Token | window |
Token ID | 4324 |
Feature activation | +4.15e+01 |
Pos logit contributions | |
and | +0.198 |
, | +0.189 |
came | +0.177 |
said | +0.173 |
answered | +0.168 |
Neg logit contributions | |
nam | -0.146 |
ey | -0.144 |
igsaw | -0.144 |
redd | -0.133 |
ifying | -0.129 |
Token | . |
Token ID | 13 |
Feature activation | +6.59e+01 |
Pos logit contributions | |
squeezed | +0.073 |
told | +0.072 |
promised | +0.072 |
listened | +0.072 |
checked | +0.070 |
Neg logit contributions | |
ifying | -0.201 |
rier | -0.197 |
sill | -0.196 |
saw | -0.195 |
ey | -0.188 |
Token | It |
Token ID | 632 |
Feature activation | +5.41e+01 |
Pos logit contributions | |
and | +0.265 |
, | +0.252 |
came | +0.218 |
said | +0.214 |
got | +0.214 |
Neg logit contributions | |
igsaw | -0.282 |
ey | -0.260 |
nam | -0.246 |
saw | -0.246 |
pill | -0.244 |
Token | said |
Token ID | 531 |
Feature activation | +7.66e+01 |
Pos logit contributions | |
, | +0.149 |
and | +0.143 |
; | +0.116 |
owing | +0.110 |
owed | +0.110 |
Neg logit contributions | |
saw | -0.219 |
spec | -0.219 |
nam | -0.218 |
pill | -0.218 |
redd | -0.218 |
Token | " |
Token ID | 366 |
Feature activation | +6.34e+01 |
Pos logit contributions | |
argued | +0.179 |
and | +0.165 |
promised | +0.161 |
got | +0.159 |
squeezed | +0.156 |
Neg logit contributions | |
rier | -0.383 |
igsaw | -0.358 |
ocol | -0.342 |
outer | -0.331 |
disco | -0.330 |
Token | Army |
Token ID | 45272 |
Feature activation | +1.22e+02 |
Pos logit contributions | |
, | +0.409 |
and | +0.392 |
came | +0.337 |
. | +0.333 |
told | +0.332 |
Neg logit contributions | |
Ta | -0.125 |
igsaw | -0.125 |
saw | -0.124 |
pill | -0.120 |
E | -0.120 |
Token | ". |
Token ID | 1911 |
Feature activation | +7.91e+01 |
Pos logit contributions | |
and | +0.359 |
checked | +0.356 |
shook | +0.352 |
said | +0.346 |
felt | +0.341 |
Neg logit contributions | |
ifying | -0.535 |
igsaw | -0.513 |
concession | -0.480 |
rier | -0.480 |
arl | -0.479 |
Token |
|
Token ID | 198 |
Feature activation | +3.94e+01 |
Pos logit contributions | |
and | +0.283 |
, | +0.271 |
came | +0.233 |
got | +0.224 |
woke | +0.223 |
Neg logit contributions | |
igsaw | -0.357 |
ey | -0.336 |
saw | -0.324 |
nam | -0.322 |
pill | -0.315 |
Token |
|
Token ID | 198 |
Feature activation | +3.99e+01 |
Pos logit contributions | |
and | +0.124 |
, | +0.112 |
. | +0.089 |
said | +0.086 |
came | +0.082 |
Neg logit contributions | |
igsaw | -0.223 |
ey | -0.214 |
nam | -0.209 |
redd | -0.203 |
pill | -0.201 |
Token | " |
Token ID | 1 |
Feature activation | +6.83e+01 |
Pos logit contributions | |
and | +0.164 |
, | +0.150 |
. | +0.126 |
said | +0.124 |
came | +0.118 |
Neg logit contributions | |
igsaw | -0.193 |
ey | -0.186 |
nam | -0.175 |
umm | -0.173 |
iqu | -0.173 |
Token | Wow |
Token ID | 22017 |
Feature activation | +4.20e+01 |
Pos logit contributions | |
and | +0.366 |
, | +0.339 |
said | +0.305 |
came | +0.301 |
. | +0.300 |
Neg logit contributions | |
igsaw | -0.229 |
ey | -0.211 |
nam | -0.194 |
iqu | -0.187 |
saw | -0.186 |
Token | and |
Token ID | 290 |
Feature activation | +7.27e+01 |
Pos logit contributions | |
woke | +0.067 |
came | +0.063 |
fought | +0.061 |
listened | +0.061 |
ran | +0.060 |
Neg logit contributions | |
rier | -0.296 |
ju | -0.294 |
rou | -0.294 |
redd | -0.291 |
ifying | -0.282 |
Token | started |
Token ID | 2067 |
Feature activation | +6.34e+01 |
Pos logit contributions | |
and | +0.177 |
, | +0.172 |
. | +0.133 |
owed | +0.122 |
- | +0.117 |
Neg logit contributions | |
nam | -0.367 |
redd | -0.360 |
ju | -0.350 |
ey | -0.337 |
spec | -0.328 |
Token | to |
Token ID | 284 |
Feature activation | +5.50e+01 |
Pos logit contributions | |
and | +0.201 |
, | +0.184 |
said | +0.161 |
came | +0.149 |
answered | +0.143 |
Neg logit contributions | |
igsaw | -0.293 |
ey | -0.287 |
nam | -0.279 |
pill | -0.278 |
rou | -0.278 |
Token | chew |
Token ID | 34722 |
Feature activation | +5.23e+01 |
Pos logit contributions | |
and | +0.234 |
, | +0.216 |
came | +0.193 |
said | +0.190 |
. | +0.181 |
Neg logit contributions | |
ey | -0.208 |
igsaw | -0.205 |
nam | -0.194 |
redd | -0.194 |
ju | -0.181 |
Token | . |
Token ID | 13 |
Feature activation | +6.54e+01 |
Pos logit contributions | |
answered | +0.125 |
rang | +0.118 |
woke | +0.117 |
entered | +0.115 |
said | +0.114 |
Neg logit contributions | |
rier | -0.263 |
redd | -0.252 |
ju | -0.243 |
ocol | -0.243 |
igsaw | -0.242 |
Token | Crunch |
Token ID | 38531 |
Feature activation | +1.09e+02 |
Pos logit contributions | |
and | +0.285 |
, | +0.266 |
came | +0.229 |
said | +0.226 |
. | +0.226 |
Neg logit contributions | |
igsaw | -0.268 |
ey | -0.247 |
nam | -0.231 |
saw | -0.230 |
redd | -0.228 |
Token | , |
Token ID | 11 |
Feature activation | +6.77e+01 |
Pos logit contributions | |
and | +0.266 |
answered | +0.256 |
whispered | +0.256 |
told | +0.255 |
woke | +0.253 |
Neg logit contributions | |
igsaw | -0.543 |
pill | -0.495 |
umm | -0.495 |
rou | -0.488 |
rier | -0.475 |
Token | crunch |
Token ID | 23527 |
Feature activation | +3.92e+01 |
Pos logit contributions | |
, | +0.278 |
and | +0.274 |
. | +0.253 |
owed | +0.249 |
answered | +0.241 |
Neg logit contributions | |
redd | -0.247 |
ju | -0.234 |
ey | -0.230 |
nam | -0.219 |
igsaw | -0.212 |
Token | , |
Token ID | 11 |
Feature activation | +6.87e+01 |
Pos logit contributions | |
read | +0.167 |
whispered | +0.159 |
searched | +0.154 |
told | +0.153 |
and | +0.153 |
Neg logit contributions | |
rou | -0.102 |
igsaw | -0.101 |
rier | -0.096 |
pill | -0.096 |
reating | -0.093 |
Token | he |
Token ID | 339 |
Feature activation | +4.92e+01 |
Pos logit contributions | |
, | +0.179 |
and | +0.163 |
owed | +0.157 |
. | +0.157 |
answered | +0.152 |
Neg logit contributions | |
redd | -0.343 |
ju | -0.332 |
ey | -0.319 |
pill | -0.310 |
nam | -0.295 |
Token | went |
Token ID | 1816 |
Feature activation | +7.57e+01 |
Pos logit contributions | |
, | +0.147 |
and | +0.138 |
; | +0.136 |
- | +0.128 |
answered | +0.124 |
Neg logit contributions | |
ju | -0.184 |
redd | -0.174 |
pill | -0.164 |
spec | -0.155 |
triangular | -0.150 |
Token | But |
Token ID | 887 |
Feature activation | +6.46e+01 |
Pos logit contributions | |
and | +0.253 |
, | +0.239 |
woke | +0.223 |
came | +0.222 |
heard | +0.218 |
Neg logit contributions | |
igsaw | -0.182 |
saw | -0.179 |
umm | -0.174 |
redd | -0.171 |
pill | -0.170 |
Token | they |
Token ID | 484 |
Feature activation | +3.73e+01 |
Pos logit contributions | |
and | +0.180 |
, | +0.153 |
. | +0.127 |
came | +0.123 |
said | +0.121 |
Neg logit contributions | |
igsaw | -0.339 |
nam | -0.334 |
ey | -0.329 |
saw | -0.322 |
redd | -0.318 |
Token | kept |
Token ID | 4030 |
Feature activation | +7.40e+01 |
Pos logit contributions | |
and | +0.107 |
, | +0.100 |
. | +0.085 |
! | +0.083 |
owed | +0.083 |
Neg logit contributions | |
nam | -0.158 |
gol | -0.147 |
microsc | -0.132 |
ju | -0.130 |
redd | -0.130 |
Token | trying |
Token ID | 2111 |
Feature activation | +5.28e+01 |
Pos logit contributions | |
and | +0.303 |
, | +0.283 |
said | +0.236 |
. | +0.236 |
came | +0.234 |
Neg logit contributions | |
igsaw | -0.327 |
ey | -0.312 |
nam | -0.304 |
saw | -0.290 |
redd | -0.287 |
Token | . |
Token ID | 13 |
Feature activation | +5.94e+01 |
Pos logit contributions | |
woke | +0.105 |
said | +0.101 |
came | +0.095 |
gave | +0.092 |
sat | +0.091 |
Neg logit contributions | |
microsc | -0.273 |
igsaw | -0.270 |
nam | -0.270 |
ey | -0.258 |
rou | -0.258 |
Token | Tap |
Token ID | 16880 |
Feature activation | +1.09e+02 |
Pos logit contributions | |
and | +0.267 |
, | +0.254 |
woke | +0.233 |
came | +0.232 |
said | +0.227 |
Neg logit contributions | |
igsaw | -0.203 |
saw | -0.194 |
umm | -0.182 |
pill | -0.181 |
redd | -0.180 |
Token | , |
Token ID | 11 |
Feature activation | +6.18e+01 |
Pos logit contributions | |
said | +0.122 |
answered | +0.114 |
talked | +0.110 |
woke | +0.107 |
and | +0.107 |
Neg logit contributions | |
igsaw | -0.691 |
rou | -0.638 |
venth | -0.638 |
ocol | -0.634 |
amental | -0.629 |
Token | tap |
Token ID | 9814 |
Feature activation | +6.08e+01 |
Pos logit contributions | |
and | +0.281 |
, | +0.269 |
. | +0.235 |
said | +0.234 |
came | +0.231 |
Neg logit contributions | |
igsaw | -0.223 |
ey | -0.219 |
nam | -0.211 |
redd | -0.201 |
saw | -0.200 |
Token | , |
Token ID | 11 |
Feature activation | +6.27e+01 |
Pos logit contributions | |
answered | +0.117 |
promised | +0.107 |
read | +0.107 |
whispered | +0.106 |
woke | +0.103 |
Neg logit contributions | |
spec | -0.300 |
downward | -0.292 |
rou | -0.287 |
wre | -0.272 |
rier | -0.271 |
Token | tap |
Token ID | 9814 |
Feature activation | +6.17e+01 |
Pos logit contributions | |
, | +0.239 |
and | +0.236 |
owed | +0.228 |
answered | +0.228 |
said | +0.221 |
Neg logit contributions | |
ey | -0.220 |
redd | -0.204 |
nam | -0.203 |
igsaw | -0.199 |
saw | -0.193 |
Token | , |
Token ID | 11 |
Feature activation | +6.36e+01 |
Pos logit contributions | |
answered | +0.130 |
woke | +0.125 |
promised | +0.119 |
read | +0.118 |
received | +0.114 |
Neg logit contributions | |
spec | -0.302 |
downward | -0.300 |
rou | -0.295 |
rier | -0.280 |
wre | -0.277 |
Token | and |
Token ID | 290 |
Feature activation | +6.28e+01 |
Pos logit contributions | |
checked | +0.066 |
gave | +0.065 |
woke | +0.062 |
hugged | +0.061 |
held | +0.060 |
Neg logit contributions | |
igsaw | -0.180 |
pill | -0.176 |
ey | -0.171 |
Wed | -0.171 |
rou | -0.169 |
Token | said |
Token ID | 531 |
Feature activation | +6.41e+01 |
Pos logit contributions | |
and | +0.162 |
, | +0.143 |
le | +0.132 |
. | +0.130 |
owed | +0.124 |
Neg logit contributions | |
nam | -0.304 |
rou | -0.264 |
saw | -0.262 |
redd | -0.260 |
ju | -0.257 |
Token | " |
Token ID | 366 |
Feature activation | +5.09e+01 |
Pos logit contributions | |
and | +0.107 |
checked | +0.065 |
said | +0.063 |
whispered | +0.054 |
squeezed | +0.054 |
Neg logit contributions | |
nam | -0.377 |
igsaw | -0.375 |
rier | -0.366 |
redd | -0.359 |
ju | -0.351 |
Token | Tap |
Token ID | 45081 |
Feature activation | +8.69e+01 |
Pos logit contributions | |
, | +0.294 |
and | +0.283 |
. | +0.244 |
: | +0.240 |
said | +0.232 |
Neg logit contributions | |
saw | -0.139 |
redd | -0.136 |
nam | -0.132 |
Oh | -0.129 |
Ta | -0.129 |
Token | ! |
Token ID | 0 |
Feature activation | +6.63e+01 |
Pos logit contributions | |
said | +0.229 |
shook | +0.201 |
answered | +0.199 |
and | +0.199 |
fell | +0.196 |
Neg logit contributions | |
igsaw | -0.480 |
outer | -0.432 |
ifying | -0.402 |
Amen | -0.402 |
amental | -0.400 |
Token | Tap |
Token ID | 16880 |
Feature activation | +1.05e+02 |
Pos logit contributions | |
and | +0.338 |
, | +0.318 |
said | +0.283 |
. | +0.282 |
came | +0.277 |
Neg logit contributions | |
igsaw | -0.226 |
ey | -0.201 |
nam | -0.196 |
saw | -0.195 |
ifying | -0.188 |
Token | ! |
Token ID | 0 |
Feature activation | +6.73e+01 |
Pos logit contributions | |
said | +0.249 |
and | +0.219 |
answered | +0.219 |
agreed | +0.214 |
cried | +0.205 |
Neg logit contributions | |
igsaw | -0.585 |
outer | -0.529 |
amental | -0.523 |
rou | -0.510 |
ifying | -0.506 |
Token | I |
Token ID | 314 |
Feature activation | +3.17e+01 |
Pos logit contributions | |
and | +0.336 |
, | +0.317 |
. | +0.278 |
said | +0.276 |
came | +0.273 |
Neg logit contributions | |
igsaw | -0.238 |
ey | -0.214 |
nam | -0.210 |
saw | -0.207 |
ifying | -0.197 |
Token | found |
Token ID | 1043 |
Feature activation | +6.12e+01 |
Pos logit contributions | |
and | +0.127 |
, | +0.123 |
: | +0.111 |
. | +0.108 |
red | +0.106 |
Neg logit contributions | |
rou | -0.091 |
ey | -0.084 |
igsaw | -0.083 |
spir | -0.083 |
iqu | -0.082 |
Token | you |
Token ID | 345 |
Feature activation | +2.25e+01 |
Pos logit contributions | |
and | +0.281 |
, | +0.255 |
came | +0.246 |
woke | +0.240 |
got | +0.239 |
Neg logit contributions | |
igsaw | -0.205 |
nam | -0.201 |
outer | -0.194 |
microsc | -0.186 |
ey | -0.185 |
Token | !" |
Token ID | 2474 |
Feature activation | +6.27e+01 |
Pos logit contributions | |
shook | +0.076 |
replied | +0.075 |
said | +0.071 |
cried | +0.071 |
explained | +0.070 |
Neg logit contributions | |
ifying | -0.081 |
rou | -0.079 |
outer | -0.073 |
ours | -0.072 |
possible | -0.071 |
Token | a |
Token ID | 257 |
Feature activation | +3.64e+01 |
Pos logit contributions | |
and | +0.222 |
, | +0.204 |
came | +0.178 |
said | +0.177 |
got | +0.173 |
Neg logit contributions | |
igsaw | -0.257 |
ey | -0.246 |
nam | -0.239 |
redd | -0.229 |
outer | -0.228 |
Token | way |
Token ID | 835 |
Feature activation | +7.75e+01 |
Pos logit contributions | |
and | +0.177 |
, | +0.164 |
said | +0.149 |
came | +0.149 |
. | +0.143 |
Neg logit contributions | |
igsaw | -0.120 |
redd | -0.106 |
saw | -0.105 |
ey | -0.103 |
nam | -0.103 |
Token | back |
Token ID | 736 |
Feature activation | +4.21e+01 |
Pos logit contributions | |
and | +0.191 |
, | +0.168 |
said | +0.130 |
came | +0.127 |
. | +0.115 |
Neg logit contributions | |
igsaw | -0.455 |
ey | -0.433 |
nam | -0.421 |
saw | -0.420 |
redd | -0.408 |
Token | . |
Token ID | 13 |
Feature activation | +6.68e+01 |
Pos logit contributions | |
and | +0.126 |
, | +0.115 |
came | +0.114 |
woke | +0.111 |
said | +0.109 |
Neg logit contributions | |
igsaw | -0.202 |
ey | -0.186 |
saw | -0.180 |
outer | -0.176 |
rou | -0.169 |
Token | He |
Token ID | 679 |
Feature activation | +4.45e+01 |
Pos logit contributions | |
and | +0.253 |
, | +0.234 |
. | +0.191 |
said | +0.189 |
came | +0.187 |
Neg logit contributions | |
igsaw | -0.324 |
ey | -0.309 |
nam | -0.299 |
saw | -0.291 |
redd | -0.288 |
Token | died |
Token ID | 3724 |
Feature activation | +1.02e+02 |
Pos logit contributions | |
, | +0.126 |
. | +0.119 |
owed | +0.109 |
and | +0.105 |
," | +0.101 |
Neg logit contributions | |
wre | -0.170 |
rou | -0.165 |
gol | -0.160 |
iam | -0.157 |
spe | -0.156 |
Token | by |
Token ID | 416 |
Feature activation | +4.32e+01 |
Pos logit contributions | |
woke | +0.212 |
came | +0.210 |
read | +0.206 |
answered | +0.202 |
guessed | +0.200 |
Neg logit contributions | |
rier | -0.515 |
iring | -0.483 |
nam | -0.475 |
rou | -0.464 |
ifying | -0.427 |
Token | the |
Token ID | 262 |
Feature activation | +4.60e+01 |
Pos logit contributions | |
and | +0.159 |
, | +0.153 |
came | +0.151 |
said | +0.149 |
got | +0.144 |
Neg logit contributions | |
igsaw | -0.168 |
nam | -0.151 |
Wed | -0.151 |
ey | -0.151 |
outer | -0.147 |
Token | wide |
Token ID | 3094 |
Feature activation | +1.38e+01 |
Pos logit contributions | |
and | +0.246 |
, | +0.238 |
came | +0.218 |
said | +0.212 |
answered | +0.209 |
Neg logit contributions | |
igsaw | -0.116 |
ey | -0.114 |
redd | -0.107 |
concession | -0.107 |
saw | -0.106 |
Token | lake |
Token ID | 13546 |
Feature activation | +6.21e+01 |
Pos logit contributions | |
owed | +0.062 |
listened | +0.060 |
helped | +0.059 |
said | +0.058 |
agreed | +0.058 |
Neg logit contributions | |
camps | -0.040 |
spec | -0.039 |
igsaw | -0.038 |
outer | -0.038 |
rier | -0.037 |
Token | . |
Token ID | 13 |
Feature activation | +7.00e+01 |
Pos logit contributions | |
whispered | +0.084 |
squeezed | +0.080 |
rang | +0.079 |
owed | +0.077 |
repeated | +0.077 |
Neg logit contributions | |
igsaw | -0.330 |
rier | -0.321 |
rou | -0.319 |
spe | -0.317 |
ifying | -0.289 |
Token | King |
Token ID | 2677 |
Feature activation | +4.45e+01 |
Pos logit contributions | |
igsaw | +0.025 |
ey | +0.025 |
nam | +0.022 |
rier | +0.022 |
spec | +0.021 |
Neg logit contributions | |
and | -0.044 |
owed | -0.042 |
, | -0.040 |
said | -0.040 |
checked | -0.039 |
Token | was |
Token ID | 373 |
Feature activation | +5.20e+01 |
Pos logit contributions | |
and | +0.089 |
squeezed | +0.079 |
irus | +0.078 |
, | +0.077 |
woke | +0.074 |
Neg logit contributions | |
pill | -0.199 |
spe | -0.190 |
saw | -0.186 |
spec | -0.184 |
iqu | -0.179 |
Token | cruel |
Token ID | 12177 |
Feature activation | +3.06e+01 |
Pos logit contributions | |
and | +0.216 |
, | +0.199 |
woke | +0.182 |
said | +0.180 |
owed | +0.176 |
Neg logit contributions | |
nam | -0.181 |
igsaw | -0.175 |
ey | -0.171 |
rou | -0.169 |
rier | -0.165 |
Token | and |
Token ID | 290 |
Feature activation | +7.40e+01 |
Pos logit contributions | |
rang | +0.041 |
talked | +0.041 |
squeezed | +0.037 |
owed | +0.036 |
read | +0.036 |
Neg logit contributions | |
rier | -0.161 |
rou | -0.156 |
spec | -0.155 |
reating | -0.152 |
igsaw | -0.151 |
Token | had |
Token ID | 550 |
Feature activation | +5.37e+01 |
Pos logit contributions | |
and | +0.303 |
, | +0.280 |
. | +0.231 |
woke | +0.216 |
owed | +0.213 |
Neg logit contributions | |
nam | -0.307 |
rou | -0.271 |
redd | -0.269 |
ey | -0.266 |
igsaw | -0.266 |
Token | enslaved |
Token ID | 39216 |
Feature activation | +1.00e+02 |
Pos logit contributions | |
and | +0.206 |
, | +0.200 |
owed | +0.167 |
. | +0.145 |
woke | +0.144 |
Neg logit contributions | |
nam | -0.197 |
rou | -0.193 |
redd | -0.193 |
pill | -0.192 |
microsc | -0.189 |
Token | many |
Token ID | 867 |
Feature activation | +4.15e+01 |
Pos logit contributions | |
checked | +0.331 |
shook | +0.327 |
listened | +0.324 |
and | +0.313 |
fell | +0.308 |
Neg logit contributions | |
ifying | -0.381 |
igsaw | -0.374 |
outer | -0.369 |
rier | -0.345 |
acy | -0.342 |
Token | of |
Token ID | 286 |
Feature activation | +2.07e+01 |
Pos logit contributions | |
owed | +0.176 |
and | +0.176 |
replied | +0.170 |
said | +0.170 |
, | +0.166 |
Neg logit contributions | |
pill | -0.129 |
igsaw | -0.127 |
ey | -0.124 |
microsc | -0.121 |
ocol | -0.120 |
Token | the |
Token ID | 262 |
Feature activation | +4.47e+01 |
Pos logit contributions | |
and | +0.079 |
, | +0.076 |
owed | +0.074 |
fell | +0.073 |
said | +0.073 |
Neg logit contributions | |
nam | -0.076 |
igsaw | -0.076 |
microsc | -0.074 |
ey | -0.074 |
ocol | -0.073 |
Token | people |
Token ID | 661 |
Feature activation | +7.07e+01 |
Pos logit contributions | |
and | +0.230 |
, | +0.215 |
owed | +0.203 |
said | +0.199 |
gave | +0.194 |
Neg logit contributions | |
igsaw | -0.129 |
ey | -0.127 |
nam | -0.122 |
microsc | -0.113 |
pill | -0.110 |
Token | of |
Token ID | 286 |
Feature activation | +2.21e+01 |
Pos logit contributions | |
checked | +0.140 |
shook | +0.140 |
woke | +0.135 |
squeezed | +0.135 |
answered | +0.129 |
Neg logit contributions | |
saw | -0.342 |
nam | -0.333 |
igsaw | -0.331 |
pill | -0.324 |
ifying | -0.323 |
Token | door |
Token ID | 3420 |
Feature activation | +3.81e+01 |
Pos logit contributions | |
and | +0.119 |
, | +0.112 |
said | +0.097 |
. | +0.095 |
came | +0.095 |
Neg logit contributions | |
igsaw | -0.092 |
ey | -0.090 |
nam | -0.088 |
saw | -0.083 |
redd | -0.082 |
Token | made |
Token ID | 925 |
Feature activation | +5.40e+01 |
Pos logit contributions | |
anas | +0.119 |
agree | +0.104 |
rs | +0.102 |
and | +0.100 |
ible | +0.095 |
Neg logit contributions | |
saw | -0.144 |
spe | -0.125 |
hatt | -0.121 |
iring | -0.114 |
ey | -0.113 |
Token | a |
Token ID | 257 |
Feature activation | +2.04e+01 |
Pos logit contributions | |
and | +0.218 |
nodded | +0.214 |
agreed | +0.213 |
ate | +0.205 |
, | +0.204 |
Neg logit contributions | |
outer | -0.139 |
saw | -0.127 |
____ | -0.122 |
reating | -0.121 |
iring | -0.120 |
Token | sound |
Token ID | 2128 |
Feature activation | +3.29e+01 |
Pos logit contributions | |
and | +0.093 |
, | +0.088 |
. | +0.075 |
said | +0.073 |
came | +0.071 |
Neg logit contributions | |
igsaw | -0.086 |
ey | -0.080 |
pill | -0.078 |
nam | -0.075 |
saw | -0.074 |
Token | . |
Token ID | 13 |
Feature activation | +5.04e+01 |
Pos logit contributions | |
argued | +0.098 |
anas | +0.096 |
hew | +0.093 |
irus | +0.091 |
nodded | +0.090 |
Neg logit contributions | |
outer | -0.129 |
saw | -0.123 |
reating | -0.115 |
possible | -0.108 |
ifying | -0.106 |
Token | Knock |
Token ID | 29061 |
Feature activation | +9.96e+01 |
Pos logit contributions | |
and | +0.245 |
, | +0.230 |
listened | +0.210 |
came | +0.210 |
promised | +0.209 |
Neg logit contributions | |
igsaw | -0.139 |
nam | -0.131 |
saw | -0.124 |
ey | -0.123 |
outer | -0.113 |
Token | , |
Token ID | 11 |
Feature activation | +5.28e+01 |
Pos logit contributions | |
agreed | +0.183 |
checked | +0.179 |
argued | +0.176 |
promised | +0.172 |
talked | +0.170 |
Neg logit contributions | |
igsaw | -0.521 |
outer | -0.481 |
Amen | -0.471 |
nam | -0.465 |
ifying | -0.463 |
Token | knock |
Token ID | 10643 |
Feature activation | +4.54e+01 |
Pos logit contributions | |
, | +0.190 |
and | +0.181 |
owed | +0.165 |
said | +0.160 |
answered | +0.160 |
Neg logit contributions | |
nam | -0.217 |
ey | -0.203 |
saw | -0.192 |
redd | -0.188 |
ifying | -0.186 |
Token | , |
Token ID | 11 |
Feature activation | +5.37e+01 |
Pos logit contributions | |
anas | +0.149 |
age | +0.143 |
searched | +0.143 |
watered | +0.134 |
comb | +0.133 |
Neg logit contributions | |
saw | -0.148 |
yourselves | -0.138 |
rou | -0.129 |
reating | -0.126 |
concession | -0.123 |
Token | knock |
Token ID | 10643 |
Feature activation | +4.63e+01 |
Pos logit contributions | |
, | +0.145 |
owed | +0.141 |
red | +0.129 |
and | +0.128 |
answered | +0.125 |
Neg logit contributions | |
nam | -0.248 |
ey | -0.242 |
redd | -0.231 |
saw | -0.227 |
ifying | -0.222 |
Token | . |
Token ID | 13 |
Feature activation | +5.33e+01 |
Pos logit contributions | |
anas | +0.177 |
agree | +0.170 |
searched | +0.167 |
owed | +0.166 |
age | +0.166 |
Neg logit contributions | |
saw | -0.134 |
yourselves | -0.126 |
reating | -0.119 |
nam | -0.116 |
ifying | -0.114 |
Token | went |
Token ID | 1816 |
Feature activation | +7.34e+01 |
Pos logit contributions | |
spe | +0.000 |
spec | +0.000 |
rou | +0.000 |
iam | +0.000 |
acy | +0.000 |
Neg logit contributions | |
," | -0.000 |
either | -0.000 |
or | -0.000 |
, | -0.000 |
. | -0.000 |
Token | to |
Token ID | 284 |
Feature activation | +5.69e+01 |
Pos logit contributions | |
and | +0.209 |
, | +0.192 |
said | +0.166 |
came | +0.159 |
woke | +0.155 |
Neg logit contributions | |
igsaw | -0.375 |
saw | -0.356 |
ey | -0.350 |
nam | -0.341 |
outer | -0.331 |
Token | the |
Token ID | 262 |
Feature activation | +4.43e+01 |
Pos logit contributions | |
and | +0.206 |
, | +0.191 |
came | +0.158 |
said | +0.156 |
. | +0.153 |
Neg logit contributions | |
igsaw | -0.275 |
ey | -0.262 |
nam | -0.256 |
saw | -0.245 |
outer | -0.241 |
Token | park |
Token ID | 3952 |
Feature activation | +5.37e+01 |
Pos logit contributions | |
and | +0.220 |
, | +0.207 |
came | +0.180 |
. | +0.178 |
said | +0.177 |
Neg logit contributions | |
igsaw | -0.155 |
ey | -0.147 |
saw | -0.142 |
nam | -0.140 |
redd | -0.136 |
Token | and |
Token ID | 290 |
Feature activation | +7.68e+01 |
Pos logit contributions | |
died | +0.097 |
woke | +0.096 |
checked | +0.094 |
lied | +0.086 |
fell | +0.086 |
Neg logit contributions | |
igsaw | -0.272 |
saw | -0.266 |
nam | -0.252 |
dining | -0.244 |
spir | -0.243 |
Token | performed |
Token ID | 6157 |
Feature activation | +9.96e+01 |
Pos logit contributions | |
and | +0.189 |
, | +0.165 |
. | +0.144 |
! | +0.117 |
- | +0.109 |
Neg logit contributions | |
nam | -0.399 |
saw | -0.386 |
igsaw | -0.371 |
microsc | -0.360 |
outer | -0.352 |
Token | for |
Token ID | 329 |
Feature activation | +5.70e+01 |
Pos logit contributions | |
and | +0.349 |
woke | +0.328 |
came | +0.326 |
got | +0.318 |
checked | +0.316 |
Neg logit contributions | |
igsaw | -0.427 |
nam | -0.388 |
ey | -0.384 |
outer | -0.380 |
downward | -0.371 |
Token | a |
Token ID | 257 |
Feature activation | +4.00e+01 |
Pos logit contributions | |
and | +0.224 |
, | +0.205 |
came | +0.182 |
said | +0.181 |
went | +0.173 |
Neg logit contributions | |
igsaw | -0.249 |
nam | -0.235 |
ey | -0.228 |
outer | -0.223 |
microsc | -0.218 |
Token | long |
Token ID | 890 |
Feature activation | +8.64e+00 |
Pos logit contributions | |
and | +0.201 |
, | +0.183 |
came | +0.167 |
said | +0.167 |
. | +0.160 |
Neg logit contributions | |
igsaw | -0.141 |
nam | -0.118 |
ey | -0.114 |
saw | -0.113 |
outer | -0.110 |
Token | time |
Token ID | 640 |
Feature activation | +5.20e+01 |
Pos logit contributions | |
and | +0.038 |
, | +0.034 |
said | +0.034 |
came | +0.033 |
gave | +0.033 |
Neg logit contributions | |
igsaw | -0.033 |
ey | -0.027 |
Wed | -0.026 |
gol | -0.024 |
outer | -0.024 |
Token | . |
Token ID | 13 |
Feature activation | +7.04e+01 |
Pos logit contributions | |
woke | +0.111 |
promised | +0.094 |
checked | +0.090 |
said | +0.089 |
shook | +0.087 |
Neg logit contributions | |
igsaw | -0.279 |
outer | -0.259 |
rou | -0.256 |
possible | -0.253 |
spec | -0.252 |
Token | Ben |
Token ID | 11696 |
Feature activation | +4.11e+01 |
Pos logit contributions | |
and | +0.152 |
, | +0.138 |
. | +0.115 |
said | +0.107 |
came | +0.095 |
Neg logit contributions | |
igsaw | -0.174 |
nam | -0.162 |
umm | -0.160 |
iqu | -0.156 |
ey | -0.151 |
Token | took |
Token ID | 1718 |
Feature activation | +7.27e+01 |
Pos logit contributions | |
. | +0.100 |
," | +0.097 |
because | +0.096 |
." | +0.092 |
, | +0.089 |
Neg logit contributions | |
ey | -0.163 |
pill | -0.150 |
rou | -0.149 |
spec | -0.149 |
amental | -0.143 |
Token | the |
Token ID | 262 |
Feature activation | +4.25e+01 |
Pos logit contributions | |
and | +0.214 |
, | +0.199 |
. | +0.150 |
came | +0.137 |
said | +0.133 |
Neg logit contributions | |
igsaw | -0.438 |
ey | -0.432 |
nam | -0.420 |
iqu | -0.412 |
pill | -0.410 |
Token | stick |
Token ID | 4859 |
Feature activation | +6.38e+01 |
Pos logit contributions | |
and | +0.167 |
, | +0.156 |
. | +0.128 |
said | +0.126 |
came | +0.125 |
Neg logit contributions | |
igsaw | -0.204 |
ey | -0.192 |
nam | -0.186 |
saw | -0.182 |
pill | -0.179 |
Token | and |
Token ID | 290 |
Feature activation | +7.49e+01 |
Pos logit contributions | |
came | +0.047 |
said | +0.046 |
woke | +0.045 |
and | +0.043 |
, | +0.042 |
Neg logit contributions | |
igsaw | -0.417 |
ey | -0.398 |
saw | -0.396 |
redd | -0.382 |
nam | -0.382 |
Token | tapped |
Token ID | 22499 |
Feature activation | +9.94e+01 |
Pos logit contributions | |
and | +0.177 |
, | +0.166 |
. | +0.135 |
- | +0.109 |
woke | +0.108 |
Neg logit contributions | |
nam | -0.395 |
ey | -0.385 |
igsaw | -0.385 |
saw | -0.380 |
redd | -0.371 |
Token | the |
Token ID | 262 |
Feature activation | +4.43e+01 |
Pos logit contributions | |
talked | +0.384 |
rang | +0.376 |
listened | +0.366 |
told | +0.363 |
woke | +0.362 |
Neg logit contributions | |
igsaw | -0.290 |
redd | -0.288 |
rier | -0.281 |
ey | -0.281 |
ifying | -0.280 |
Token | ac |
Token ID | 936 |
Feature activation | +1.35e+01 |
Pos logit contributions | |
and | +0.180 |
, | +0.168 |
said | +0.139 |
. | +0.138 |
came | +0.138 |
Neg logit contributions | |
igsaw | -0.197 |
ey | -0.190 |
nam | -0.182 |
redd | -0.177 |
saw | -0.176 |
Token | orn |
Token ID | 1211 |
Feature activation | +2.16e+01 |
Pos logit contributions | |
and | +0.065 |
, | +0.060 |
. | +0.052 |
came | +0.049 |
said | +0.048 |
Neg logit contributions | |
igsaw | -0.056 |
nam | -0.055 |
ey | -0.051 |
iqu | -0.050 |
Amen | -0.049 |
Token | with |
Token ID | 351 |
Feature activation | +6.55e+01 |
Pos logit contributions | |
talked | +0.050 |
woke | +0.048 |
listened | +0.046 |
discussed | +0.046 |
told | +0.045 |
Neg logit contributions | |
rier | -0.102 |
redd | -0.092 |
saw | -0.091 |
nam | -0.091 |
downward | -0.089 |
Token | it |
Token ID | 340 |
Feature activation | +5.89e+01 |
Pos logit contributions | |
and | +0.218 |
, | +0.199 |
. | +0.157 |
said | +0.155 |
came | +0.154 |
Neg logit contributions | |
igsaw | -0.347 |
ey | -0.332 |
nam | -0.322 |
saw | -0.314 |
pill | -0.311 |
Token | as |
Token ID | 355 |
Feature activation | +5.31e+01 |
Pos logit contributions | |
owed | +0.201 |
, | +0.174 |
woke | +0.167 |
came | +0.160 |
answered | +0.160 |
Neg logit contributions | |
nam | -0.257 |
ey | -0.255 |
ju | -0.250 |
redd | -0.248 |
rou | -0.241 |
Token | it |
Token ID | 340 |
Feature activation | +5.31e+01 |
Pos logit contributions | |
and | +0.200 |
, | +0.186 |
. | +0.151 |
said | +0.148 |
came | +0.147 |
Neg logit contributions | |
igsaw | -0.261 |
ey | -0.249 |
nam | -0.238 |
saw | -0.232 |
pill | -0.232 |
Token | slowly |
Token ID | 6364 |
Feature activation | +5.76e+01 |
Pos logit contributions | |
and | +0.149 |
, | +0.135 |
," | +0.116 |
!" | +0.108 |
anas | +0.102 |
Neg logit contributions | |
iqu | -0.218 |
spec | -0.215 |
pill | -0.214 |
ju | -0.212 |
deepest | -0.212 |
Token | began |
Token ID | 2540 |
Feature activation | +6.21e+01 |
Pos logit contributions | |
anas | +0.147 |
!" | +0.146 |
?" | +0.145 |
irus | +0.129 |
and | +0.125 |
Neg logit contributions | |
spec | -0.234 |
ubb | -0.209 |
pill | -0.206 |
gol | -0.203 |
ey | -0.201 |
Token | to |
Token ID | 284 |
Feature activation | +5.46e+01 |
Pos logit contributions | |
woke | +0.191 |
and | +0.185 |
said | +0.182 |
ate | +0.173 |
gave | +0.173 |
Neg logit contributions | |
microsc | -0.239 |
rou | -0.232 |
iqu | -0.231 |
igsaw | -0.229 |
ey | -0.228 |
Token | dawn |
Token ID | 17577 |
Feature activation | +9.92e+01 |
Pos logit contributions | |
and | +0.238 |
ed | +0.235 |
, | +0.218 |
es | +0.217 |
owed | +0.217 |
Neg logit contributions | |
outer | -0.189 |
ey | -0.168 |
comparison | -0.158 |
igsaw | -0.157 |
redd | -0.153 |
Token | on |
Token ID | 319 |
Feature activation | +5.41e+01 |
Pos logit contributions | |
told | +0.190 |
and | +0.182 |
said | +0.178 |
promised | +0.177 |
listened | +0.176 |
Neg logit contributions | |
igsaw | -0.546 |
rier | -0.524 |
ifying | -0.504 |
pill | -0.499 |
spec | -0.499 |
Token | him |
Token ID | 683 |
Feature activation | +3.93e+01 |
Pos logit contributions | |
owed | +0.238 |
said | +0.223 |
agreed | +0.218 |
gave | +0.217 |
whispered | +0.216 |
Neg logit contributions | |
igsaw | -0.173 |
outer | -0.165 |
microsc | -0.156 |
Wed | -0.154 |
nam | -0.148 |
Token | that |
Token ID | 326 |
Feature activation | +5.46e+01 |
Pos logit contributions | |
checked | +0.099 |
woke | +0.089 |
listened | +0.089 |
promised | +0.088 |
shook | +0.086 |
Neg logit contributions | |
igsaw | -0.172 |
microsc | -0.170 |
rou | -0.166 |
iqu | -0.165 |
ifying | -0.161 |
Token | he |
Token ID | 339 |
Feature activation | +4.78e+01 |
Pos logit contributions | |
and | +0.232 |
owed | +0.222 |
, | +0.202 |
red | +0.195 |
hugged | +0.191 |
Neg logit contributions | |
nam | -0.184 |
microsc | -0.175 |
outer | -0.167 |
igsaw | -0.166 |
ey | -0.162 |
Token | was |
Token ID | 373 |
Feature activation | +5.48e+01 |
Pos logit contributions | |
, | +0.144 |
and | +0.136 |
owed | +0.105 |
but | +0.105 |
. | +0.104 |
Neg logit contributions | |
pill | -0.195 |
rou | -0.186 |
spec | -0.175 |
iqu | -0.174 |
spe | -0.171 |
Token | the |
Token ID | 262 |
Feature activation | +3.69e+01 |
Pos logit contributions | |
and | +0.165 |
, | +0.152 |
said | +0.128 |
came | +0.128 |
. | +0.120 |
Neg logit contributions | |
igsaw | -0.236 |
ey | -0.221 |
nam | -0.220 |
redd | -0.209 |
saw | -0.209 |
Token | ground |
Token ID | 2323 |
Feature activation | +4.32e+01 |
Pos logit contributions | |
and | +0.168 |
, | +0.158 |
came | +0.138 |
said | +0.137 |
. | +0.132 |
Neg logit contributions | |
igsaw | -0.139 |
ey | -0.131 |
nam | -0.131 |
saw | -0.126 |
redd | -0.123 |
Token | that |
Token ID | 326 |
Feature activation | +4.92e+01 |
Pos logit contributions | |
promised | +0.087 |
woke | +0.079 |
checked | +0.077 |
listened | +0.074 |
remembered | +0.074 |
Neg logit contributions | |
saw | -0.209 |
rou | -0.195 |
ey | -0.194 |
ifying | -0.192 |
igsaw | -0.188 |
Token | said |
Token ID | 531 |
Feature activation | +7.07e+01 |
Pos logit contributions | |
and | +0.149 |
, | +0.126 |
owed | +0.092 |
red | +0.089 |
! | +0.087 |
Neg logit contributions | |
nam | -0.259 |
redd | -0.227 |
ju | -0.215 |
spec | -0.209 |
saw | -0.208 |
Token | " |
Token ID | 366 |
Feature activation | +5.75e+01 |
Pos logit contributions | |
and | +0.233 |
promised | +0.196 |
argued | +0.193 |
talked | +0.192 |
cried | +0.191 |
Neg logit contributions | |
igsaw | -0.287 |
microsc | -0.262 |
rier | -0.260 |
nam | -0.258 |
redd | -0.250 |
Token | short |
Token ID | 19509 |
Feature activation | +9.91e+01 |
Pos logit contributions | |
, | +0.354 |
and | +0.342 |
: | +0.287 |
. | +0.287 |
told | +0.277 |
Neg logit contributions | |
E | -0.132 |
Oh | -0.130 |
igsaw | -0.130 |
nam | -0.128 |
pill | -0.127 |
Token | cut |
Token ID | 8968 |
Feature activation | -1.85e+01 |
Pos logit contributions | |
said | +0.394 |
and | +0.391 |
felt | +0.390 |
cried | +0.389 |
talked | +0.386 |
Neg logit contributions | |
igsaw | -0.383 |
ifying | -0.354 |
arl | -0.332 |
rier | -0.325 |
gol | -0.322 |
Token | ". |
Token ID | 1911 |
Feature activation | +7.36e+01 |
Pos logit contributions | |
rier | +0.056 |
umm | +0.046 |
igsaw | +0.045 |
gol | +0.045 |
ifying | +0.044 |
Neg logit contributions | |
and | -0.084 |
said | -0.084 |
looked | -0.083 |
checked | -0.083 |
searched | -0.082 |
Token | She |
Token ID | 1375 |
Feature activation | +5.10e+01 |
Pos logit contributions | |
and | +0.332 |
, | +0.306 |
listened | +0.288 |
said | +0.284 |
came | +0.283 |
Neg logit contributions | |
igsaw | -0.262 |
ey | -0.215 |
nam | -0.209 |
pill | -0.200 |
Wed | -0.194 |
Token | thought |
Token ID | 1807 |
Feature activation | +7.42e+01 |
Pos logit contributions | |
, | +0.124 |
. | +0.116 |
and | +0.108 |
," | +0.097 |
! | +0.086 |
Neg logit contributions | |
Isa | -0.224 |
pill | -0.220 |
amental | -0.217 |
spec | -0.211 |
saw | -0.207 |
Token | it |
Token ID | 340 |
Feature activation | +5.44e+01 |
Pos logit contributions | |
argued | +0.285 |
red | +0.282 |
owed | +0.276 |
approached | +0.275 |
overheard | +0.272 |
Neg logit contributions | |
igsaw | -0.217 |
pill | -0.190 |
math | -0.186 |
ocol | -0.184 |
possible | -0.183 |
Token | late |
Token ID | 2739 |
Feature activation | +1.72e+01 |
Pos logit contributions | |
and | +0.190 |
, | +0.183 |
came | +0.166 |
said | +0.163 |
. | +0.160 |
Neg logit contributions | |
igsaw | -0.087 |
ey | -0.075 |
nam | -0.074 |
redd | -0.073 |
rou | -0.069 |
Token | . |
Token ID | 13 |
Feature activation | +6.31e+01 |
Pos logit contributions | |
woke | +0.031 |
checked | +0.031 |
gave | +0.030 |
got | +0.030 |
talked | +0.028 |
Neg logit contributions | |
igsaw | -0.085 |
rier | -0.084 |
rou | -0.081 |
possible | -0.080 |
Lorenzo | -0.078 |
Token | The |
Token ID | 383 |
Feature activation | +5.62e+01 |
Pos logit contributions | |
and | +0.258 |
, | +0.240 |
came | +0.203 |
said | +0.201 |
. | +0.199 |
Neg logit contributions | |
igsaw | -0.274 |
nam | -0.254 |
ey | -0.251 |
pill | -0.241 |
redd | -0.240 |
Token | bear |
Token ID | 6842 |
Feature activation | +5.32e+01 |
Pos logit contributions | |
and | +0.225 |
, | +0.209 |
. | +0.173 |
said | +0.171 |
came | +0.170 |
Neg logit contributions | |
igsaw | -0.262 |
ey | -0.249 |
nam | -0.240 |
saw | -0.233 |
redd | -0.231 |
Token | had |
Token ID | 550 |
Feature activation | +5.27e+01 |
Pos logit contributions | |
owed | +0.126 |
agree | +0.125 |
rs | +0.122 |
, | +0.119 |
. | +0.114 |
Neg logit contributions | |
pill | -0.180 |
rou | -0.177 |
Mic | -0.172 |
spe | -0.167 |
ju | -0.158 |
Token | died |
Token ID | 3724 |
Feature activation | +9.91e+01 |
Pos logit contributions | |
and | +0.192 |
owed | +0.187 |
, | +0.187 |
le | +0.167 |
agree | +0.151 |
Neg logit contributions | |
ju | -0.178 |
pill | -0.177 |
Wed | -0.174 |
rou | -0.173 |
redd | -0.173 |
Token | . |
Token ID | 13 |
Feature activation | +6.54e+01 |
Pos logit contributions | |
agree | +0.212 |
agreed | +0.206 |
guessed | +0.201 |
read | +0.200 |
counted | +0.199 |
Neg logit contributions | |
rier | -0.498 |
nam | -0.483 |
rou | -0.459 |
iring | -0.446 |
ifying | -0.435 |
Token |
|
Token ID | 198 |
Feature activation | +3.67e+01 |
Pos logit contributions | |
and | +0.256 |
, | +0.240 |
came | +0.199 |
. | +0.196 |
said | +0.194 |
Neg logit contributions | |
igsaw | -0.292 |
ey | -0.272 |
nam | -0.267 |
pill | -0.262 |
saw | -0.257 |
Token |
|
Token ID | 198 |
Feature activation | +3.72e+01 |
Pos logit contributions | |
and | +0.127 |
, | +0.115 |
. | +0.093 |
said | +0.092 |
came | +0.089 |
Neg logit contributions | |
igsaw | -0.194 |
ey | -0.187 |
nam | -0.183 |
redd | -0.177 |
saw | -0.174 |
Token | The |
Token ID | 464 |
Feature activation | +4.82e+01 |
Pos logit contributions | |
and | +0.175 |
, | +0.161 |
. | +0.139 |
said | +0.138 |
came | +0.135 |
Neg logit contributions | |
igsaw | -0.153 |
ey | -0.149 |
nam | -0.142 |
redd | -0.135 |
iqu | -0.133 |
Token | little |
Token ID | 1310 |
Feature activation | +1.11e+01 |
Pos logit contributions | |
and | +0.195 |
, | +0.181 |
. | +0.148 |
said | +0.147 |
came | +0.145 |
Neg logit contributions | |
igsaw | -0.233 |
ey | -0.217 |
nam | -0.210 |
saw | -0.205 |
pill | -0.204 |
Token | and |
Token ID | 290 |
Feature activation | +7.17e+01 |
Pos logit contributions | |
checked | +0.111 |
shook | +0.107 |
came | +0.107 |
looked | +0.106 |
walked | +0.106 |
Neg logit contributions | |
outer | -0.141 |
rier | -0.138 |
rou | -0.135 |
ifying | -0.134 |
possible | -0.131 |
Token | find |
Token ID | 1064 |
Feature activation | +5.70e+01 |
Pos logit contributions | |
and | +0.246 |
, | +0.229 |
. | +0.183 |
came | +0.181 |
said | +0.173 |
Neg logit contributions | |
nam | -0.337 |
ey | -0.322 |
igsaw | -0.314 |
saw | -0.308 |
redd | -0.298 |
Token | out |
Token ID | 503 |
Feature activation | +3.68e+01 |
Pos logit contributions | |
and | +0.225 |
, | +0.206 |
got | +0.186 |
came | +0.185 |
went | +0.182 |
Neg logit contributions | |
outer | -0.204 |
ey | -0.197 |
nam | -0.196 |
igsaw | -0.194 |
ocol | -0.190 |
Token | why |
Token ID | 1521 |
Feature activation | +5.35e+01 |
Pos logit contributions | |
and | +0.143 |
shook | +0.133 |
, | +0.131 |
came | +0.131 |
fell | +0.130 |
Neg logit contributions | |
outer | -0.129 |
nam | -0.123 |
rou | -0.123 |
saw | -0.120 |
igsaw | -0.120 |
Token | it |
Token ID | 340 |
Feature activation | +5.48e+01 |
Pos logit contributions | |
and | +0.229 |
, | +0.194 |
checked | +0.193 |
searched | +0.192 |
owed | +0.192 |
Neg logit contributions | |
nam | -0.181 |
possible | -0.163 |
outer | -0.158 |
saw | -0.155 |
igsaw | -0.151 |
Token | died |
Token ID | 3724 |
Feature activation | +9.91e+01 |
Pos logit contributions | |
and | +0.189 |
, | +0.183 |
. | +0.146 |
- | +0.140 |
answered | +0.139 |
Neg logit contributions | |
pill | -0.222 |
iqu | -0.212 |
spec | -0.209 |
ey | -0.198 |
redd | -0.194 |
Token | ." |
Token ID | 526 |
Feature activation | +6.99e+01 |
Pos logit contributions | |
answered | +0.329 |
talked | +0.329 |
walked | +0.328 |
agreed | +0.324 |
handed | +0.324 |
Neg logit contributions | |
rier | -0.409 |
possible | -0.369 |
nam | -0.339 |
outer | -0.338 |
ifying | -0.330 |
Token |
|
Token ID | 198 |
Feature activation | +3.67e+01 |
Pos logit contributions | |
and | +0.267 |
, | +0.252 |
came | +0.223 |
gave | +0.218 |
got | +0.215 |
Neg logit contributions | |
igsaw | -0.296 |
ey | -0.289 |
saw | -0.266 |
pill | -0.257 |
Deal | -0.251 |
Token |
|
Token ID | 198 |
Feature activation | +3.72e+01 |
Pos logit contributions | |
and | +0.120 |
, | +0.110 |
. | +0.086 |
said | +0.085 |
came | +0.083 |
Neg logit contributions | |
igsaw | -0.199 |
ey | -0.190 |
nam | -0.186 |
redd | -0.179 |
saw | -0.179 |
Token | The |
Token ID | 464 |
Feature activation | +4.82e+01 |
Pos logit contributions | |
and | +0.198 |
, | +0.186 |
. | +0.162 |
said | +0.162 |
came | +0.160 |
Neg logit contributions | |
igsaw | -0.126 |
ey | -0.117 |
nam | -0.114 |
pill | -0.106 |
redd | -0.105 |
Token | teacher |
Token ID | 4701 |
Feature activation | +6.29e+01 |
Pos logit contributions | |
and | +0.183 |
, | +0.170 |
. | +0.140 |
said | +0.139 |
came | +0.136 |
Neg logit contributions | |
igsaw | -0.249 |
ey | -0.231 |
ifying | -0.230 |
pill | -0.229 |
nam | -0.228 |
Token | to |
Token ID | 284 |
Feature activation | +5.50e+01 |
Pos logit contributions | |
and | +0.187 |
, | +0.172 |
woke | +0.171 |
fell | +0.169 |
came | +0.169 |
Neg logit contributions | |
ey | -0.378 |
redd | -0.363 |
nam | -0.361 |
igsaw | -0.349 |
saw | -0.348 |
Token | it |
Token ID | 340 |
Feature activation | +5.54e+01 |
Pos logit contributions | |
and | +0.186 |
, | +0.173 |
said | +0.137 |
came | +0.137 |
. | +0.136 |
Neg logit contributions | |
igsaw | -0.272 |
ey | -0.266 |
nam | -0.260 |
saw | -0.255 |
redd | -0.251 |
Token | and |
Token ID | 290 |
Feature activation | +7.45e+01 |
Pos logit contributions | |
fought | +0.144 |
died | +0.129 |
argued | +0.113 |
woke | +0.109 |
drowned | +0.106 |
Neg logit contributions | |
downward | -0.256 |
iring | -0.247 |
outer | -0.240 |
redd | -0.229 |
deepest | -0.224 |
Token | said |
Token ID | 531 |
Feature activation | +7.57e+01 |
Pos logit contributions | |
and | +0.173 |
, | +0.157 |
. | +0.137 |
ides | +0.133 |
ops | +0.127 |
Neg logit contributions | |
nam | -0.397 |
ju | -0.333 |
redd | -0.327 |
saw | -0.324 |
spec | -0.313 |
Token | ' |
Token ID | 705 |
Feature activation | +6.47e+01 |
Pos logit contributions | |
and | +0.073 |
said | +0.018 |
, | +0.007 |
gave | +0.004 |
came | +0.001 |
Neg logit contributions | |
igsaw | -0.528 |
nam | -0.518 |
rier | -0.503 |
ey | -0.502 |
redd | -0.498 |
Token | sun |
Token ID | 19155 |
Feature activation | +9.89e+01 |
Pos logit contributions | |
and | +0.374 |
, | +0.366 |
. | +0.301 |
: | +0.297 |
said | +0.296 |
Neg logit contributions | |
saw | -0.164 |
igsaw | -0.161 |
nam | -0.161 |
redd | -0.156 |
ifying | -0.153 |
Token | '. |
Token ID | 4458 |
Feature activation | +6.79e+01 |
Pos logit contributions | |
hid | +0.365 |
checked | +0.357 |
and | +0.357 |
fell | +0.347 |
cried | +0.344 |
Neg logit contributions | |
igsaw | -0.407 |
pill | -0.388 |
ifying | -0.378 |
Wed | -0.353 |
saw | -0.352 |
Token | Then |
Token ID | 3244 |
Feature activation | +6.11e+01 |
Pos logit contributions | |
and | +0.345 |
, | +0.312 |
gave | +0.294 |
came | +0.290 |
told | +0.283 |
Neg logit contributions | |
ey | -0.181 |
igsaw | -0.177 |
saw | -0.171 |
spec | -0.158 |
microsc | -0.156 |
Token | Mike |
Token ID | 4995 |
Feature activation | +4.92e+01 |
Pos logit contributions | |
and | +0.119 |
, | +0.071 |
woke | +0.068 |
came | +0.066 |
gave | +0.066 |
Neg logit contributions | |
nam | -0.343 |
igsaw | -0.334 |
ey | -0.329 |
spec | -0.327 |
saw | -0.326 |
Token | ran |
Token ID | 4966 |
Feature activation | +7.84e+01 |
Pos logit contributions | |
rs | +0.117 |
because | +0.113 |
than | +0.108 |
anas | +0.106 |
, | +0.104 |
Neg logit contributions | |
saw | -0.188 |
rou | -0.179 |
pill | -0.167 |
iam | -0.164 |
iqu | -0.163 |
Token | off |
Token ID | 572 |
Feature activation | +5.11e+01 |
Pos logit contributions | |
and | +0.210 |
, | +0.190 |
said | +0.143 |
. | +0.141 |
came | +0.140 |
Neg logit contributions | |
igsaw | -0.456 |
ey | -0.435 |
nam | -0.422 |
saw | -0.418 |
redd | -0.410 |
Token | to |
Token ID | 284 |
Feature activation | +5.55e+01 |
Pos logit contributions | |
and | +0.265 |
owed | +0.246 |
, | +0.231 |
hed | +0.224 |
replied | +0.215 |
Neg logit contributions | |
microsc | -0.260 |
nam | -0.260 |
outer | -0.257 |
backup | -0.256 |
possible | -0.247 |
Token | help |
Token ID | 1037 |
Feature activation | +6.14e+01 |
Pos logit contributions | |
and | +0.228 |
, | +0.203 |
said | +0.176 |
answered | +0.168 |
came | +0.167 |
Neg logit contributions | |
igsaw | -0.199 |
ey | -0.194 |
Amen | -0.191 |
saw | -0.182 |
ê | -0.180 |
Token | . |
Token ID | 13 |
Feature activation | +6.59e+01 |
Pos logit contributions | |
checked | +0.160 |
answered | +0.159 |
woke | +0.158 |
talked | +0.158 |
owed | +0.155 |
Neg logit contributions | |
nam | -0.250 |
rier | -0.241 |
wre | -0.236 |
spec | -0.234 |
saw | -0.226 |
Token | The |
Token ID | 383 |
Feature activation | +5.90e+01 |
Pos logit contributions | |
and | +0.299 |
owed | +0.278 |
, | +0.276 |
came | +0.275 |
woke | +0.271 |
Neg logit contributions | |
igsaw | -0.173 |
pill | -0.173 |
ju | -0.155 |
redd | -0.154 |
saw | -0.151 |
Token | spirit |
Token ID | 4437 |
Feature activation | +4.81e+01 |
Pos logit contributions | |
and | +0.236 |
, | +0.219 |
. | +0.177 |
came | +0.170 |
went | +0.169 |
Neg logit contributions | |
igsaw | -0.300 |
ey | -0.282 |
pill | -0.272 |
rou | -0.270 |
saw | -0.267 |
Token | served |
Token ID | 4983 |
Feature activation | +9.87e+01 |
Pos logit contributions | |
anas | +0.128 |
ops | +0.116 |
ides | +0.112 |
irus | +0.110 |
oor | +0.110 |
Neg logit contributions | |
igator | -0.170 |
spe | -0.161 |
gol | -0.151 |
worm | -0.150 |
hatt | -0.150 |
Token | the |
Token ID | 262 |
Feature activation | +4.52e+01 |
Pos logit contributions | |
rang | +0.412 |
owed | +0.393 |
fell | +0.388 |
and | +0.384 |
checked | +0.381 |
Neg logit contributions | |
nam | -0.244 |
microsc | -0.235 |
wre | -0.232 |
igsaw | -0.222 |
redd | -0.217 |
Token | rabbit |
Token ID | 22746 |
Feature activation | +5.52e+01 |
Pos logit contributions | |
and | +0.212 |
, | +0.202 |
. | +0.176 |
said | +0.176 |
came | +0.175 |
Neg logit contributions | |
igsaw | -0.157 |
ey | -0.149 |
nam | -0.146 |
redd | -0.139 |
pill | -0.137 |
Token | some |
Token ID | 617 |
Feature activation | +3.55e+01 |
Pos logit contributions | |
rang | +0.152 |
came | +0.146 |
said | +0.144 |
repeated | +0.143 |
answered | +0.141 |
Neg logit contributions | |
igsaw | -0.228 |
nam | -0.199 |
redd | -0.199 |
ju | -0.192 |
Wed | -0.192 |
Token | y |
Token ID | 331 |
Feature activation | +2.01e+01 |
Pos logit contributions | |
and | +0.161 |
came | +0.156 |
, | +0.155 |
listened | +0.155 |
counted | +0.151 |
Neg logit contributions | |
ey | -0.095 |
redd | -0.089 |
ocol | -0.087 |
pill | -0.086 |
igsaw | -0.086 |
Token | ummy |
Token ID | 13513 |
Feature activation | +1.41e+01 |
Pos logit contributions | |
and | +0.107 |
, | +0.104 |
. | +0.090 |
came | +0.083 |
! | +0.081 |
Neg logit contributions | |
nam | -0.071 |
igsaw | -0.067 |
saw | -0.066 |
ey | -0.063 |
redd | -0.061 |
Token | the |
Token ID | 262 |
Feature activation | +4.25e+01 |
Pos logit contributions | |
and | +0.242 |
, | +0.229 |
. | +0.190 |
said | +0.178 |
came | +0.178 |
Neg logit contributions | |
igsaw | -0.260 |
nam | -0.254 |
ey | -0.252 |
redd | -0.232 |
pill | -0.229 |
Token | tied |
Token ID | 8165 |
Feature activation | +7.08e+01 |
Pos logit contributions | |
and | +0.209 |
, | +0.201 |
woke | +0.180 |
came | +0.178 |
fell | +0.174 |
Neg logit contributions | |
igsaw | -0.135 |
redd | -0.129 |
ey | -0.126 |
nam | -0.124 |
ju | -0.117 |
Token | rope |
Token ID | 17182 |
Feature activation | +5.32e+01 |
Pos logit contributions | |
woke | +0.219 |
and | +0.208 |
said | +0.202 |
told | +0.194 |
, | +0.193 |
Neg logit contributions | |
igsaw | -0.304 |
ey | -0.285 |
redd | -0.281 |
rou | -0.270 |
nam | -0.265 |
Token | around |
Token ID | 1088 |
Feature activation | +4.32e+01 |
Pos logit contributions | |
woke | +0.153 |
anas | +0.144 |
listened | +0.136 |
told | +0.131 |
ate | +0.131 |
Neg logit contributions | |
igsaw | -0.223 |
rou | -0.187 |
possible | -0.185 |
rier | -0.184 |
nam | -0.184 |
Token | it |
Token ID | 340 |
Feature activation | +5.72e+01 |
Pos logit contributions | |
lied | +0.181 |
said | +0.178 |
woke | +0.177 |
rang | +0.177 |
owed | +0.174 |
Neg logit contributions | |
igsaw | -0.127 |
nam | -0.115 |
microsc | -0.110 |
ey | -0.108 |
spec | -0.105 |
Token | served |
Token ID | 4983 |
Feature activation | +9.87e+01 |
Pos logit contributions | |
anas | +0.138 |
irus | +0.130 |
woke | +0.128 |
wo | +0.121 |
overheard | +0.120 |
Neg logit contributions | |
igsaw | -0.225 |
prett | -0.212 |
ju | -0.211 |
biggest | -0.208 |
tallest | -0.204 |
Token | as |
Token ID | 355 |
Feature activation | +5.86e+01 |
Pos logit contributions | |
rang | +0.319 |
woke | +0.307 |
checked | +0.298 |
fell | +0.294 |
answered | +0.293 |
Neg logit contributions | |
igsaw | -0.378 |
ju | -0.350 |
nam | -0.350 |
redd | -0.347 |
spec | -0.344 |
Token | a |
Token ID | 257 |
Feature activation | +3.91e+01 |
Pos logit contributions | |
and | +0.239 |
, | +0.223 |
said | +0.187 |
came | +0.186 |
. | +0.184 |
Neg logit contributions | |
igsaw | -0.260 |
ey | -0.244 |
nam | -0.240 |
saw | -0.231 |
redd | -0.229 |
Token | reminder |
Token ID | 15438 |
Feature activation | +8.71e+01 |
Pos logit contributions | |
and | +0.194 |
, | +0.177 |
owed | +0.161 |
came | +0.160 |
said | +0.158 |
Neg logit contributions | |
igsaw | -0.134 |
gol | -0.109 |
ey | -0.108 |
saw | -0.106 |
redd | -0.106 |
Token | to |
Token ID | 284 |
Feature activation | +5.96e+01 |
Pos logit contributions | |
talked | +0.412 |
cried | +0.406 |
argued | +0.405 |
fought | +0.396 |
woke | +0.393 |
Neg logit contributions | |
igsaw | -0.252 |
rou | -0.202 |
possible | -0.182 |
ey | -0.181 |
pill | -0.166 |
Token | her |
Token ID | 607 |
Feature activation | +3.64e+01 |
Pos logit contributions | |
and | +0.225 |
, | +0.207 |
came | +0.188 |
said | +0.187 |
woke | +0.185 |
Neg logit contributions | |
igsaw | -0.252 |
nam | -0.237 |
ey | -0.226 |
Wed | -0.215 |
outer | -0.214 |
Token | wear |
Token ID | 5806 |
Feature activation | +4.85e+01 |
Pos logit contributions | |
and | +0.243 |
, | +0.215 |
ed | +0.212 |
said | +0.208 |
owed | +0.203 |
Neg logit contributions | |
outer | -0.171 |
Wed | -0.159 |
Isa | -0.156 |
igsaw | -0.155 |
ey | -0.153 |
Token | the |
Token ID | 262 |
Feature activation | +4.29e+01 |
Pos logit contributions | |
and | +0.169 |
woke | +0.158 |
, | +0.157 |
came | +0.154 |
went | +0.153 |
Neg logit contributions | |
outer | -0.185 |
igsaw | -0.181 |
nam | -0.179 |
microsc | -0.173 |
ey | -0.169 |
Token | hat |
Token ID | 6877 |
Feature activation | +5.13e+01 |
Pos logit contributions | |
and | +0.161 |
, | +0.154 |
came | +0.126 |
. | +0.124 |
said | +0.122 |
Neg logit contributions | |
ey | -0.188 |
igsaw | -0.184 |
redd | -0.178 |
nam | -0.171 |
saw | -0.166 |
Token | while |
Token ID | 981 |
Feature activation | +7.24e+01 |
Pos logit contributions | |
woke | +0.111 |
checked | +0.110 |
ran | +0.104 |
listened | +0.104 |
squeezed | +0.103 |
Neg logit contributions | |
nam | -0.239 |
igsaw | -0.236 |
possible | -0.230 |
reating | -0.227 |
ifying | -0.223 |
Token | he |
Token ID | 339 |
Feature activation | +4.83e+01 |
Pos logit contributions | |
and | +0.311 |
, | +0.274 |
said | +0.268 |
owed | +0.267 |
agreed | +0.263 |
Neg logit contributions | |
nam | -0.255 |
saw | -0.215 |
rier | -0.211 |
ocol | -0.210 |
outer | -0.207 |
Token | performed |
Token ID | 6157 |
Feature activation | +9.87e+01 |
Pos logit contributions | |
, | +0.147 |
; | +0.137 |
and | +0.123 |
- | +0.118 |
but | +0.117 |
Neg logit contributions | |
pill | -0.193 |
rou | -0.176 |
andon | -0.176 |
Isa | -0.176 |
spec | -0.172 |
Token | his |
Token ID | 465 |
Feature activation | +3.13e+01 |
Pos logit contributions | |
and | +0.290 |
woke | +0.267 |
, | +0.264 |
came | +0.263 |
checked | +0.260 |
Neg logit contributions | |
igsaw | -0.458 |
outer | -0.447 |
nam | -0.435 |
rier | -0.420 |
ey | -0.417 |
Token | tricks |
Token ID | 15910 |
Feature activation | +6.83e+01 |
Pos logit contributions | |
and | +0.154 |
, | +0.147 |
came | +0.144 |
got | +0.135 |
woke | +0.135 |
Neg logit contributions | |
igsaw | -0.087 |
ey | -0.083 |
nam | -0.076 |
Wed | -0.071 |
downward | -0.070 |
Token | . |
Token ID | 13 |
Feature activation | +6.85e+01 |
Pos logit contributions | |
checked | +0.197 |
woke | +0.169 |
walked | +0.166 |
calmed | +0.166 |
died | +0.161 |
Neg logit contributions | |
igsaw | -0.293 |
possible | -0.290 |
reating | -0.287 |
rier | -0.272 |
downward | -0.268 |
Token | He |
Token ID | 679 |
Feature activation | +4.63e+01 |
Pos logit contributions | |
and | +0.338 |
, | +0.324 |
came | +0.299 |
fell | +0.291 |
said | +0.287 |
Neg logit contributions | |
igsaw | -0.218 |
saw | -0.187 |
redd | -0.186 |
pill | -0.183 |
ey | -0.179 |
Token | put |
Token ID | 1234 |
Feature activation | +7.93e+01 |
Pos logit contributions | |
, | +0.125 |
; | +0.110 |
. | +0.108 |
and | +0.108 |
! | +0.103 |
Neg logit contributions | |
saw | -0.184 |
iam | -0.181 |
rou | -0.181 |
gol | -0.178 |
Isa | -0.176 |
Token | dog |
Token ID | 3290 |
Feature activation | +6.14e+01 |
Pos logit contributions | |
reached | +0.141 |
and | +0.138 |
agreed | +0.136 |
squeezed | +0.134 |
, | +0.134 |
Neg logit contributions | |
rou | -0.088 |
ey | -0.086 |
igsaw | -0.084 |
fault | -0.083 |
jan | -0.083 |
Token | , |
Token ID | 11 |
Feature activation | +6.41e+01 |
Pos logit contributions | |
int | +0.191 |
red | +0.187 |
ged | +0.181 |
bed | +0.174 |
examined | +0.169 |
Neg logit contributions | |
concession | -0.175 |
igators | -0.171 |
pill | -0.170 |
Wed | -0.168 |
rewarding | -0.166 |
Token | Spot |
Token ID | 15899 |
Feature activation | +4.57e+01 |
Pos logit contributions | |
hed | +0.245 |
red | +0.221 |
ed | +0.220 |
owed | +0.219 |
fell | +0.216 |
Neg logit contributions | |
nam | -0.220 |
Wed | -0.206 |
concession | -0.193 |
reating | -0.193 |
microsc | -0.186 |
Token | , |
Token ID | 11 |
Feature activation | +6.50e+01 |
Pos logit contributions | |
int | +0.123 |
red | +0.117 |
examined | +0.116 |
squeezed | +0.108 |
shook | +0.106 |
Neg logit contributions | |
concession | -0.148 |
Wed | -0.135 |
living | -0.133 |
chores | -0.129 |
ancer | -0.124 |
Token | had |
Token ID | 550 |
Feature activation | +5.23e+01 |
Pos logit contributions | |
hed | +0.175 |
owed | +0.168 |
red | +0.159 |
replied | +0.151 |
answered | +0.148 |
Neg logit contributions | |
concession | -0.256 |
nam | -0.256 |
Wed | -0.251 |
reating | -0.241 |
iam | -0.220 |
Token | died |
Token ID | 3724 |
Feature activation | +9.86e+01 |
Pos logit contributions | |
and | +0.189 |
, | +0.183 |
owed | +0.157 |
hed | +0.148 |
replied | +0.145 |
Neg logit contributions | |
nam | -0.206 |
Wed | -0.186 |
microsc | -0.186 |
pill | -0.179 |
reating | -0.179 |
Token | . |
Token ID | 13 |
Feature activation | +6.50e+01 |
Pos logit contributions | |
reached | +0.240 |
replied | +0.238 |
ate | +0.227 |
popped | +0.226 |
answered | +0.225 |
Neg logit contributions | |
nam | -0.451 |
rier | -0.422 |
reating | -0.419 |
Wed | -0.408 |
ifying | -0.406 |
Token | He |
Token ID | 679 |
Feature activation | +4.27e+01 |
Pos logit contributions | |
and | +0.284 |
, | +0.270 |
came | +0.238 |
. | +0.225 |
said | +0.223 |
Neg logit contributions | |
igsaw | -0.244 |
ey | -0.233 |
nam | -0.228 |
Deal | -0.219 |
saw | -0.218 |
Token | was |
Token ID | 373 |
Feature activation | +5.39e+01 |
Pos logit contributions | |
, | +0.116 |
and | +0.095 |
red | +0.093 |
; | +0.093 |
owed | +0.092 |
Neg logit contributions | |
Amen | -0.176 |
concession | -0.163 |
iam | -0.150 |
Isa | -0.148 |
wre | -0.148 |
Token | very |
Token ID | 845 |
Feature activation | +4.20e+01 |
Pos logit contributions | |
and | +0.222 |
, | +0.210 |
. | +0.172 |
said | +0.172 |
came | +0.172 |
Neg logit contributions | |
igsaw | -0.224 |
nam | -0.214 |
ey | -0.211 |
saw | -0.200 |
redd | -0.197 |
Token | old |
Token ID | 1468 |
Feature activation | +1.20e+01 |
Pos logit contributions | |
and | +0.210 |
, | +0.197 |
came | +0.170 |
. | +0.168 |
said | +0.165 |
Neg logit contributions | |
igsaw | -0.138 |
nam | -0.131 |
ey | -0.128 |
iqu | -0.121 |
rou | -0.120 |
Token | spread |
Token ID | 4104 |
Feature activation | +7.96e+01 |
Pos logit contributions | |
and | +0.229 |
ops | +0.225 |
, | +0.218 |
woke | +0.214 |
owed | +0.206 |
Neg logit contributions | |
possible | -0.161 |
rewarding | -0.161 |
nam | -0.156 |
redd | -0.144 |
outer | -0.143 |
Token | far |
Token ID | 1290 |
Feature activation | +3.50e+01 |
Pos logit contributions | |
and | +0.243 |
woke | +0.233 |
, | +0.232 |
answered | +0.230 |
said | +0.230 |
Neg logit contributions | |
igsaw | -0.322 |
ey | -0.318 |
outer | -0.300 |
rier | -0.297 |
possible | -0.294 |
Token | and |
Token ID | 290 |
Feature activation | +7.45e+01 |
Pos logit contributions | |
came | +0.098 |
answered | +0.097 |
said | +0.095 |
counted | +0.093 |
woke | +0.092 |
Neg logit contributions | |
rier | -0.152 |
ifying | -0.148 |
igsaw | -0.147 |
ey | -0.147 |
outer | -0.146 |
Token | wide |
Token ID | 3094 |
Feature activation | +1.06e+01 |
Pos logit contributions | |
and | +0.329 |
, | +0.308 |
. | +0.260 |
said | +0.258 |
came | +0.257 |
Neg logit contributions | |
igsaw | -0.309 |
ey | -0.292 |
nam | -0.283 |
saw | -0.272 |
redd | -0.269 |
Token | . |
Token ID | 13 |
Feature activation | +6.63e+01 |
Pos logit contributions | |
said | +0.020 |
got | +0.018 |
and | +0.017 |
came | +0.017 |
answered | +0.017 |
Neg logit contributions | |
igsaw | -0.058 |
rier | -0.057 |
rou | -0.057 |
outer | -0.057 |
ey | -0.057 |
Token | Simple |
Token ID | 17427 |
Feature activation | +9.85e+01 |
Pos logit contributions | |
and | +0.206 |
, | +0.182 |
. | +0.142 |
said | +0.133 |
came | +0.128 |
Neg logit contributions | |
nam | -0.367 |
ey | -0.365 |
igsaw | -0.365 |
redd | -0.352 |
iqu | -0.348 |
Token | acts |
Token ID | 6529 |
Feature activation | +4.75e+01 |
Pos logit contributions | |
and | +0.449 |
said | +0.432 |
, | +0.431 |
fell | +0.417 |
came | +0.412 |
Neg logit contributions | |
igsaw | -0.310 |
ey | -0.295 |
spec | -0.261 |
pill | -0.259 |
rou | -0.259 |
Token | of |
Token ID | 286 |
Feature activation | +2.17e+01 |
Pos logit contributions | |
woke | +0.157 |
and | +0.153 |
came | +0.152 |
sat | +0.150 |
walked | +0.150 |
Neg logit contributions | |
igsaw | -0.171 |
rou | -0.168 |
ifying | -0.162 |
ey | -0.162 |
rier | -0.158 |
Token | kindness |
Token ID | 23887 |
Feature activation | +6.41e+01 |
Pos logit contributions | |
and | +0.099 |
, | +0.094 |
came | +0.090 |
said | +0.088 |
got | +0.087 |
Neg logit contributions | |
igsaw | -0.073 |
ey | -0.071 |
nam | -0.066 |
outer | -0.065 |
ocol | -0.062 |
Token | can |
Token ID | 460 |
Feature activation | +3.96e+01 |
Pos logit contributions | |
sat | +0.195 |
read | +0.191 |
counted | +0.189 |
overheard | +0.187 |
talked | +0.187 |
Neg logit contributions | |
igsaw | -0.203 |
pill | -0.199 |
rou | -0.194 |
possible | -0.193 |
ifying | -0.192 |
Token | often |
Token ID | 1690 |
Feature activation | +4.44e+01 |
Pos logit contributions | |
and | +0.139 |
, | +0.130 |
woke | +0.125 |
ed | +0.125 |
came | +0.123 |
Neg logit contributions | |
possible | -0.144 |
Wal | -0.132 |
outer | -0.126 |
allion | -0.123 |
rier | -0.123 |
Token | Bob |
Token ID | 5811 |
Feature activation | +5.11e+01 |
Pos logit contributions | |
and | +0.225 |
, | +0.204 |
came | +0.182 |
said | +0.176 |
. | +0.174 |
Neg logit contributions | |
igsaw | -0.263 |
nam | -0.260 |
saw | -0.252 |
redd | -0.248 |
ey | -0.244 |
Token | 's |
Token ID | 338 |
Feature activation | +3.37e+01 |
Pos logit contributions | |
owed | +0.143 |
fully | +0.112 |
but | +0.106 |
," | +0.106 |
, | +0.106 |
Neg logit contributions | |
rou | -0.198 |
pill | -0.187 |
Wal | -0.179 |
gol | -0.177 |
spe | -0.165 |
Token | passion |
Token ID | 7506 |
Feature activation | +3.73e+01 |
Pos logit contributions | |
and | +0.159 |
, | +0.148 |
said | +0.142 |
came | +0.141 |
got | +0.134 |
Neg logit contributions | |
igsaw | -0.121 |
ey | -0.109 |
nam | -0.096 |
rou | -0.096 |
gol | -0.096 |
Token | to |
Token ID | 284 |
Feature activation | +5.37e+01 |
Pos logit contributions | |
argued | +0.078 |
anas | +0.076 |
whispered | +0.074 |
irus | +0.073 |
guessed | +0.073 |
Neg logit contributions | |
rou | -0.154 |
ancer | -0.145 |
outer | -0.144 |
igsaw | -0.143 |
possible | -0.138 |
Token | create |
Token ID | 2251 |
Feature activation | +5.64e+01 |
Pos logit contributions | |
and | +0.204 |
, | +0.188 |
said | +0.160 |
came | +0.159 |
. | +0.156 |
Neg logit contributions | |
igsaw | -0.243 |
ey | -0.228 |
nam | -0.219 |
outer | -0.211 |
gol | -0.207 |
Token | consumed |
Token ID | 13529 |
Feature activation | +9.81e+01 |
Pos logit contributions | |
and | +0.234 |
, | +0.216 |
came | +0.216 |
woke | +0.214 |
said | +0.213 |
Neg logit contributions | |
igsaw | -0.202 |
ey | -0.186 |
outer | -0.184 |
nam | -0.178 |
Wed | -0.173 |
Token | him |
Token ID | 683 |
Feature activation | +3.83e+01 |
Pos logit contributions | |
and | +0.347 |
, | +0.328 |
came | +0.304 |
gave | +0.298 |
woke | +0.297 |
Neg logit contributions | |
igsaw | -0.407 |
ey | -0.395 |
nam | -0.389 |
saw | -0.379 |
spec | -0.375 |
Token | , |
Token ID | 11 |
Feature activation | +6.64e+01 |
Pos logit contributions | |
checked | +0.090 |
came | +0.088 |
reached | +0.082 |
answered | +0.080 |
shook | +0.079 |
Neg logit contributions | |
igsaw | -0.195 |
rou | -0.172 |
possible | -0.171 |
outer | -0.165 |
rier | -0.164 |
Token | and |
Token ID | 290 |
Feature activation | +7.45e+01 |
Pos logit contributions | |
owed | +0.177 |
, | +0.157 |
woke | +0.140 |
red | +0.137 |
answered | +0.135 |
Neg logit contributions | |
nam | -0.307 |
ey | -0.296 |
rou | -0.288 |
ju | -0.276 |
igsaw | -0.276 |
Token | he |
Token ID | 339 |
Feature activation | +4.73e+01 |
Pos logit contributions | |
and | +0.279 |
, | +0.252 |
. | +0.216 |
came | +0.215 |
woke | +0.206 |
Neg logit contributions | |
nam | -0.289 |
igsaw | -0.275 |
saw | -0.269 |
redd | -0.260 |
wre | -0.256 |
Token | became |
Token ID | 2627 |
Feature activation | +6.69e+01 |
Pos logit contributions | |
, | +0.125 |
and | +0.121 |
. | +0.098 |
owed | +0.097 |
because | +0.093 |
Neg logit contributions | |
rou | -0.203 |
spe | -0.196 |
gol | -0.182 |
wre | -0.176 |
ubb | -0.175 |
Token | you |
Token ID | 345 |
Feature activation | -1.31e+02 |
Pos logit contributions | |
and | +0.287 |
, | +0.260 |
. | +0.213 |
said | +0.207 |
came | +0.206 |
Neg logit contributions | |
igsaw | -0.407 |
nam | -0.395 |
ey | -0.394 |
saw | -0.375 |
redd | -0.371 |
Token | go |
Token ID | 467 |
Feature activation | -9.86e+01 |
Pos logit contributions | |
possible | +0.475 |
Amen | +0.436 |
nam | +0.431 |
winners | +0.420 |
Deal | +0.416 |
Neg logit contributions | |
and | -0.393 |
checked | -0.358 |
scanned | -0.342 |
shook | -0.327 |
searched | -0.323 |
Token | , |
Token ID | 11 |
Feature activation | -7.47e+01 |
Pos logit contributions | |
saw | +0.396 |
outer | +0.383 |
wre | +0.375 |
concession | +0.370 |
igsaw | +0.370 |
Neg logit contributions | |
answered | -0.286 |
owed | -0.286 |
red | -0.276 |
and | -0.273 |
shook | -0.270 |
Token | my |
Token ID | 616 |
Feature activation | -1.15e+02 |
Pos logit contributions | |
igsaw | +0.268 |
nam | +0.267 |
ey | +0.263 |
redd | +0.262 |
saw | +0.258 |
Neg logit contributions | |
and | -0.263 |
, | -0.254 |
came | -0.244 |
owed | -0.237 |
answered | -0.234 |
Token | d |
Token ID | 288 |
Feature activation | -1.06e+02 |
Pos logit contributions | |
igsaw | +0.357 |
ey | +0.332 |
rou | +0.327 |
gol | +0.310 |
nam | +0.309 |
Neg logit contributions | |
and | -0.521 |
came | -0.497 |
, | -0.488 |
got | -0.463 |
helped | -0.455 |
Token | arl |
Token ID | 7063 |
Feature activation | -2.32e+02 |
Pos logit contributions | |
igsaw | +0.163 |
ey | +0.131 |
rou | +0.110 |
arl | +0.109 |
saw | +0.103 |
Neg logit contributions | |
and | -0.746 |
, | -0.726 |
said | -0.681 |
came | -0.665 |
looked | -0.657 |
Token | ings |
Token ID | 654 |
Feature activation | -6.41e+01 |
Pos logit contributions | |
igsaw | +0.966 |
ey | +0.911 |
rier | +0.894 |
arl | +0.872 |
gol | +0.848 |
Neg logit contributions | |
and | -0.866 |
, | -0.792 |
came | -0.746 |
said | -0.702 |
looked | -0.694 |
Token | . |
Token ID | 13 |
Feature activation | -6.19e+01 |
Pos logit contributions | |
concession | +0.272 |
reating | +0.262 |
arl | +0.246 |
ifying | +0.246 |
Wed | +0.242 |
Neg logit contributions | |
argued | -0.158 |
owed | -0.153 |
squeezed | -0.151 |
touched | -0.150 |
watered | -0.146 |
Token | Enjoy |
Token ID | 18179 |
Feature activation | -8.58e+01 |
Pos logit contributions | |
igsaw | +0.224 |
ey | +0.209 |
nam | +0.200 |
saw | +0.192 |
redd | +0.189 |
Neg logit contributions | |
and | -0.310 |
, | -0.292 |
. | -0.252 |
said | -0.250 |
came | -0.249 |
Token | your |
Token ID | 534 |
Feature activation | -1.01e+02 |
Pos logit contributions | |
igsaw | +0.233 |
ey | +0.229 |
ocol | +0.228 |
Wed | +0.228 |
reating | +0.224 |
Neg logit contributions | |
came | -0.368 |
and | -0.363 |
searched | -0.360 |
fell | -0.358 |
stood | -0.358 |
Token | lunch |
Token ID | 9965 |
Feature activation | -8.01e+01 |
Pos logit contributions | |
ey | +0.292 |
igsaw | +0.285 |
ocol | +0.257 |
nam | +0.252 |
redd | +0.232 |
Neg logit contributions | |
and | -0.490 |
came | -0.458 |
, | -0.443 |
reached | -0.429 |
agreed | -0.428 |
Token | 's |
Token ID | 338 |
Feature activation | -1.05e+02 |
Pos logit contributions | |
redd | +0.352 |
igsaw | +0.344 |
ju | +0.334 |
ey | +0.323 |
rier | +0.322 |
Neg logit contributions | |
and | -0.271 |
came | -0.251 |
owed | -0.246 |
entered | -0.244 |
followed | -0.236 |
Token | wonderful |
Token ID | 7932 |
Feature activation | -9.81e+01 |
Pos logit contributions | |
nam | +0.366 |
igsaw | +0.343 |
redd | +0.325 |
Deal | +0.322 |
ey | +0.319 |
Neg logit contributions | |
and | -0.475 |
came | -0.441 |
, | -0.433 |
woke | -0.410 |
got | -0.399 |
Token | , |
Token ID | 11 |
Feature activation | -6.77e+01 |
Pos logit contributions | |
igsaw | +0.632 |
ey | +0.597 |
redd | +0.571 |
nam | +0.571 |
rier | +0.562 |
Neg logit contributions | |
and | -0.146 |
came | -0.126 |
got | -0.109 |
woke | -0.105 |
fell | -0.105 |
Token | my |
Token ID | 616 |
Feature activation | -1.09e+02 |
Pos logit contributions | |
igsaw | +0.244 |
redd | +0.215 |
ey | +0.213 |
nam | +0.212 |
ju | +0.201 |
Neg logit contributions | |
and | -0.295 |
, | -0.278 |
came | -0.269 |
woke | -0.260 |
got | -0.258 |
Token | d |
Token ID | 288 |
Feature activation | -1.01e+02 |
Pos logit contributions | |
igsaw | +0.329 |
rou | +0.298 |
ey | +0.293 |
nam | +0.287 |
rier | +0.275 |
Neg logit contributions | |
and | -0.525 |
came | -0.523 |
woke | -0.485 |
got | -0.483 |
, | -0.483 |
Token | arl |
Token ID | 7063 |
Feature activation | -2.27e+02 |
Pos logit contributions | |
igsaw | +0.192 |
ey | +0.161 |
rou | +0.132 |
pill | +0.129 |
saw | +0.128 |
Neg logit contributions | |
and | -0.690 |
, | -0.667 |
said | -0.613 |
came | -0.604 |
. | -0.594 |
Token | ings |
Token ID | 654 |
Feature activation | -5.99e+01 |
Pos logit contributions | |
igsaw | +0.885 |
ey | +0.835 |
rier | +0.789 |
arl | +0.775 |
gol | +0.763 |
Neg logit contributions | |
and | -0.918 |
, | -0.848 |
came | -0.798 |
woke | -0.758 |
said | -0.757 |
Token | !" |
Token ID | 2474 |
Feature activation | -5.37e+01 |
Pos logit contributions | |
ifying | +0.242 |
igsaw | +0.240 |
arl | +0.239 |
concession | +0.230 |
pill | +0.220 |
Neg logit contributions | |
owed | -0.149 |
counted | -0.143 |
argued | -0.141 |
imagined | -0.140 |
searched | -0.138 |
Token | Mom |
Token ID | 11254 |
Feature activation | -6.94e+01 |
Pos logit contributions | |
igsaw | +0.191 |
nam | +0.190 |
pill | +0.180 |
gol | +0.176 |
Wed | +0.174 |
Neg logit contributions | |
and | -0.182 |
owed | -0.172 |
, | -0.168 |
listened | -0.160 |
woke | -0.160 |
Token | my |
Token ID | 1820 |
Feature activation | -1.15e+02 |
Pos logit contributions | |
ey | +0.287 |
rou | +0.287 |
saw | +0.284 |
Amen | +0.273 |
pill | +0.273 |
Neg logit contributions | |
and | -0.190 |
, | -0.189 |
fell | -0.165 |
grew | -0.158 |
because | -0.154 |
Token | says |
Token ID | 1139 |
Feature activation | -5.16e+01 |
Pos logit contributions | |
pill | +0.518 |
rou | +0.491 |
saw | +0.485 |
spec | +0.483 |
ey | +0.480 |
Neg logit contributions | |
and | -0.219 |
, | -0.202 |
because | -0.193 |
grew | -0.185 |
red | -0.183 |
Token | Everyday |
Token ID | 48154 |
Feature activation | -1.12e+02 |
Pos logit contributions | |
and | +0.148 |
, | +0.139 |
anas | +0.131 |
owed | +0.130 |
red | +0.124 |
Neg logit contributions | |
nam | -0.186 |
rou | -0.182 |
gol | -0.165 |
Ag | -0.162 |
Wed | -0.161 |
Token | he |
Token ID | 339 |
Feature activation | -1.00e+02 |
Pos logit contributions | |
Wed | +0.417 |
igsaw | +0.382 |
pill | +0.349 |
outer | +0.349 |
Tues | +0.347 |
Neg logit contributions | |
and | -0.377 |
owed | -0.361 |
squeezed | -0.351 |
hugged | -0.341 |
checked | -0.340 |
Token | would |
Token ID | 561 |
Feature activation | -8.97e+01 |
Pos logit contributions | |
Amen | +0.357 |
saw | +0.345 |
brav | +0.337 |
spec | +0.336 |
concession | +0.336 |
Neg logit contributions | |
and | -0.258 |
, | -0.240 |
red | -0.219 |
irus | -0.206 |
because | -0.203 |
Token | take |
Token ID | 1011 |
Feature activation | -8.72e+01 |
Pos logit contributions | |
Amen | +0.305 |
outer | +0.261 |
Wed | +0.255 |
nam | +0.242 |
igsaw | +0.241 |
Neg logit contributions | |
ed | -0.333 |
and | -0.303 |
red | -0.289 |
ged | -0.287 |
owed | -0.287 |
Token | the |
Token ID | 262 |
Feature activation | -9.17e+01 |
Pos logit contributions | |
igsaw | +0.364 |
outer | +0.320 |
Wed | +0.319 |
nam | +0.310 |
ocol | +0.298 |
Neg logit contributions | |
and | -0.335 |
, | -0.300 |
said | -0.290 |
came | -0.280 |
gave | -0.277 |
Token | av |
Token ID | 1196 |
Feature activation | -2.22e+02 |
Pos logit contributions | |
igsaw | +0.323 |
ey | +0.300 |
nam | +0.295 |
redd | +0.283 |
pill | +0.266 |
Neg logit contributions | |
and | -0.441 |
, | -0.420 |
came | -0.383 |
said | -0.373 |
got | -0.366 |
Token | oc |
Token ID | 420 |
Feature activation | -1.98e+02 |
Pos logit contributions | |
igsaw | +1.089 |
ey | +1.086 |
nam | +1.041 |
iqu | +1.031 |
saw | +0.996 |
Neg logit contributions | |
and | -0.853 |
, | -0.781 |
. | -0.615 |
said | -0.577 |
came | -0.571 |
Token | ados |
Token ID | 22484 |
Feature activation | -9.85e+01 |
Pos logit contributions | |
igsaw | +0.653 |
ey | +0.603 |
nam | +0.568 |
pill | +0.545 |
saw | +0.543 |
Neg logit contributions | |
and | -1.043 |
, | -0.984 |
said | -0.876 |
came | -0.875 |
. | -0.864 |
Token | and |
Token ID | 290 |
Feature activation | -5.08e+01 |
Pos logit contributions | |
igsaw | +0.437 |
nam | +0.428 |
possible | +0.410 |
ju | +0.394 |
reating | +0.387 |
Neg logit contributions | |
woke | -0.287 |
came | -0.264 |
fell | -0.258 |
ran | -0.257 |
checked | -0.257 |
Token | scatter |
Token ID | 41058 |
Feature activation | -6.02e+01 |
Pos logit contributions | |
nam | +0.217 |
igsaw | +0.201 |
redd | +0.198 |
ju | +0.191 |
ey | +0.188 |
Neg logit contributions | |
and | -0.193 |
, | -0.174 |
owed | -0.152 |
. | -0.150 |
came | -0.150 |
Token | them |
Token ID | 606 |
Feature activation | -9.22e+01 |
Pos logit contributions | |
igsaw | +0.239 |
redd | +0.219 |
nam | +0.214 |
ju | +0.210 |
ey | +0.208 |
Neg logit contributions | |
and | -0.224 |
woke | -0.212 |
owed | -0.210 |
, | -0.208 |
listened | -0.207 |
Token | reward |
Token ID | 6721 |
Feature activation | -7.58e+01 |
Pos logit contributions | |
igsaw | +0.550 |
ey | +0.548 |
spec | +0.499 |
nam | +0.491 |
redd | +0.467 |
Neg logit contributions | |
and | -0.551 |
listened | -0.524 |
said | -0.514 |
came | -0.513 |
, | -0.511 |
Token | . |
Token ID | 13 |
Feature activation | -7.61e+01 |
Pos logit contributions | |
possible | +0.338 |
igsaw | +0.321 |
reating | +0.320 |
nam | +0.316 |
ocol | +0.301 |
Neg logit contributions | |
woke | -0.188 |
turned | -0.173 |
checked | -0.169 |
fell | -0.168 |
squeezed | -0.166 |
Token | It |
Token ID | 632 |
Feature activation | -8.45e+01 |
Pos logit contributions | |
igsaw | +0.285 |
pill | +0.253 |
nam | +0.251 |
saw | +0.247 |
Amen | +0.246 |
Neg logit contributions | |
and | -0.332 |
, | -0.312 |
came | -0.285 |
got | -0.278 |
listened | -0.276 |
Token | was |
Token ID | 373 |
Feature activation | -8.11e+01 |
Pos logit contributions | |
pill | +0.354 |
redd | +0.345 |
ju | +0.344 |
spec | +0.328 |
Looks | +0.319 |
Neg logit contributions | |
and | -0.237 |
, | -0.229 |
ged | -0.182 |
. | -0.175 |
red | -0.174 |
Token | ch |
Token ID | 442 |
Feature activation | -1.01e+02 |
Pos logit contributions | |
nam | +0.276 |
igsaw | +0.267 |
redd | +0.264 |
possible | +0.258 |
outer | +0.258 |
Neg logit contributions | |
and | -0.329 |
woke | -0.305 |
, | -0.305 |
owed | -0.286 |
came | -0.282 |
Token | ocol |
Token ID | 4668 |
Feature activation | -2.19e+02 |
Pos logit contributions | |
igsaw | +0.216 |
ey | +0.184 |
pill | +0.157 |
rou | +0.155 |
ocol | +0.153 |
Neg logit contributions | |
and | -0.662 |
, | -0.635 |
said | -0.593 |
came | -0.583 |
. | -0.572 |
Token | ates |
Token ID | 689 |
Feature activation | -7.42e+01 |
Pos logit contributions | |
igsaw | +0.638 |
ey | +0.622 |
ocol | +0.587 |
pill | +0.564 |
nam | +0.563 |
Neg logit contributions | |
and | -1.191 |
, | -1.129 |
said | -1.031 |
came | -1.001 |
. | -0.984 |
Token | and |
Token ID | 290 |
Feature activation | -5.08e+01 |
Pos logit contributions | |
ifying | +0.390 |
rou | +0.361 |
rier | +0.351 |
igsaw | +0.347 |
possible | +0.342 |
Neg logit contributions | |
checked | -0.171 |
woke | -0.146 |
promised | -0.140 |
walked | -0.140 |
came | -0.139 |
Token | sweets |
Token ID | 42402 |
Feature activation | -8.51e+01 |
Pos logit contributions | |
nam | +0.225 |
igsaw | +0.191 |
pill | +0.187 |
spec | +0.187 |
redd | +0.186 |
Neg logit contributions | |
and | -0.176 |
, | -0.155 |
owed | -0.141 |
came | -0.130 |
woke | -0.130 |
Token | ! |
Token ID | 0 |
Feature activation | -4.54e+01 |
Pos logit contributions | |
ju | +0.380 |
possible | +0.368 |
ifying | +0.363 |
rier | +0.359 |
spec | +0.357 |
Neg logit contributions | |
checked | -0.168 |
cried | -0.163 |
squeezed | -0.154 |
tapped | -0.152 |
answered | -0.147 |
Token |
|
Token ID | 198 |
Feature activation | -8.34e+01 |
Pos logit contributions | |
igsaw | +0.176 |
pill | +0.159 |
nam | +0.155 |
saw | +0.153 |
ey | +0.152 |
Neg logit contributions | |
and | -0.197 |
, | -0.189 |
came | -0.169 |
got | -0.164 |
woke | -0.161 |
Token | When |
Token ID | 1649 |
Feature activation | -7.66e+01 |
Pos logit contributions | |
igsaw | +0.289 |
ey | +0.269 |
saw | +0.266 |
pill | +0.265 |
nam | +0.261 |
Neg logit contributions | |
and | -0.358 |
, | -0.334 |
came | -0.303 |
got | -0.300 |
listened | -0.298 |
Token | Daisy |
Token ID | 40355 |
Feature activation | -1.02e+02 |
Pos logit contributions | |
igsaw | +0.272 |
nam | +0.261 |
Wed | +0.248 |
outer | +0.234 |
concession | +0.234 |
Neg logit contributions | |
and | -0.336 |
, | -0.278 |
owed | -0.268 |
. | -0.250 |
red | -0.248 |
Token | put |
Token ID | 1234 |
Feature activation | -5.93e+01 |
Pos logit contributions | |
pill | +0.407 |
Isa | +0.364 |
concession | +0.353 |
acy | +0.348 |
venth | +0.347 |
Neg logit contributions | |
anas | -0.237 |
eness | -0.228 |
fully | -0.216 |
than | -0.214 |
because | -0.210 |
Token | the |
Token ID | 262 |
Feature activation | -8.90e+01 |
Pos logit contributions | |
igsaw | +0.261 |
ey | +0.252 |
nam | +0.251 |
redd | +0.231 |
ocol | +0.230 |
Neg logit contributions | |
and | -0.196 |
woke | -0.177 |
, | -0.172 |
listened | -0.166 |
came | -0.165 |
Token | magn |
Token ID | 7842 |
Feature activation | -9.33e+01 |
Pos logit contributions | |
igsaw | +0.311 |
ey | +0.310 |
nam | +0.296 |
redd | +0.284 |
saw | +0.260 |
Neg logit contributions | |
and | -0.421 |
, | -0.401 |
came | -0.366 |
said | -0.355 |
got | -0.344 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.15e+02 |
Pos logit contributions | |
igsaw | +0.078 |
ifying | +0.077 |
ey | +0.040 |
saw | +0.024 |
nam | +0.020 |
Neg logit contributions | |
and | -0.699 |
, | -0.682 |
said | -0.644 |
came | -0.633 |
told | -0.629 |
Token | glass |
Token ID | 5405 |
Feature activation | -7.84e+01 |
Pos logit contributions | |
igsaw | +0.831 |
ey | +0.779 |
nam | +0.732 |
saw | +0.710 |
redd | +0.708 |
Neg logit contributions | |
and | -1.012 |
, | -0.949 |
. | -0.814 |
came | -0.814 |
said | -0.813 |
Token | down |
Token ID | 866 |
Feature activation | -8.38e+01 |
Pos logit contributions | |
igsaw | +0.473 |
ey | +0.418 |
pill | +0.390 |
ifying | +0.389 |
ju | +0.386 |
Neg logit contributions | |
listened | -0.132 |
talked | -0.112 |
told | -0.110 |
promised | -0.108 |
watched | -0.108 |
Token | , |
Token ID | 11 |
Feature activation | -5.46e+01 |
Pos logit contributions | |
igsaw | +0.433 |
ey | +0.421 |
nam | +0.381 |
redd | +0.375 |
ocol | +0.372 |
Neg logit contributions | |
anas | -0.176 |
talked | -0.171 |
listened | -0.169 |
woke | -0.161 |
fought | -0.154 |
Token | the |
Token ID | 262 |
Feature activation | -7.68e+01 |
Pos logit contributions | |
igsaw | +0.244 |
nam | +0.229 |
ey | +0.228 |
redd | +0.217 |
saw | +0.216 |
Neg logit contributions | |
and | -0.215 |
, | -0.196 |
. | -0.165 |
said | -0.164 |
came | -0.164 |
Token | cater |
Token ID | 20825 |
Feature activation | -9.46e+01 |
Pos logit contributions | |
ey | +0.258 |
igsaw | +0.256 |
nam | +0.241 |
spec | +0.220 |
redd | +0.214 |
Neg logit contributions | |
and | -0.363 |
, | -0.343 |
came | -0.310 |
got | -0.298 |
agreed | -0.298 |
Token | . |
Token ID | 13 |
Feature activation | -6.91e+01 |
Pos logit contributions | |
rou | +0.509 |
nam | +0.468 |
ric | +0.461 |
saw | +0.450 |
Acc | +0.449 |
Neg logit contributions | |
shook | -0.178 |
scanned | -0.163 |
popped | -0.163 |
answered | -0.161 |
, | -0.154 |
Token | She |
Token ID | 1375 |
Feature activation | -7.86e+01 |
Pos logit contributions | |
igsaw | +0.313 |
ey | +0.298 |
nam | +0.287 |
saw | +0.278 |
redd | +0.276 |
Neg logit contributions | |
and | -0.281 |
, | -0.262 |
. | -0.217 |
said | -0.214 |
came | -0.213 |
Token | loved |
Token ID | 6151 |
Feature activation | -5.55e+01 |
Pos logit contributions | |
rou | +0.252 |
Acc | +0.223 |
pill | +0.219 |
Ag | +0.211 |
Amen | +0.209 |
Neg logit contributions | |
and | -0.248 |
owed | -0.241 |
red | -0.240 |
, | -0.239 |
eness | -0.235 |
Token | to |
Token ID | 284 |
Feature activation | -7.14e+01 |
Pos logit contributions | |
igsaw | +0.318 |
ey | +0.307 |
nam | +0.300 |
saw | +0.292 |
pill | +0.292 |
Neg logit contributions | |
and | -0.152 |
, | -0.139 |
said | -0.103 |
. | -0.103 |
came | -0.100 |
Token | eat |
Token ID | 4483 |
Feature activation | -7.65e+01 |
Pos logit contributions | |
ey | +0.278 |
igsaw | +0.278 |
outer | +0.259 |
nam | +0.258 |
Amen | +0.253 |
Neg logit contributions | |
and | -0.269 |
, | -0.244 |
ed | -0.233 |
said | -0.229 |
came | -0.219 |
Token | av |
Token ID | 1196 |
Feature activation | -2.14e+02 |
Pos logit contributions | |
ey | +0.218 |
nam | +0.214 |
redd | +0.212 |
pill | +0.209 |
igsaw | +0.206 |
Neg logit contributions | |
and | -0.348 |
, | -0.332 |
came | -0.331 |
fell | -0.328 |
got | -0.326 |
Token | oc |
Token ID | 420 |
Feature activation | -1.90e+02 |
Pos logit contributions | |
igsaw | +1.115 |
ey | +1.084 |
iqu | +1.061 |
nam | +1.051 |
saw | +1.017 |
Neg logit contributions | |
and | -0.822 |
, | -0.753 |
. | -0.583 |
said | -0.539 |
came | -0.526 |
Token | ados |
Token ID | 22484 |
Feature activation | -9.08e+01 |
Pos logit contributions | |
igsaw | +0.607 |
ey | +0.562 |
nam | +0.524 |
pill | +0.520 |
saw | +0.502 |
Neg logit contributions | |
and | -1.022 |
, | -0.960 |
said | -0.852 |
. | -0.850 |
came | -0.850 |
Token | . |
Token ID | 13 |
Feature activation | -5.26e+01 |
Pos logit contributions | |
nam | +0.461 |
igsaw | +0.441 |
pill | +0.436 |
ju | +0.428 |
Wed | +0.420 |
Neg logit contributions | |
woke | -0.211 |
fell | -0.191 |
came | -0.190 |
ran | -0.184 |
checked | -0.181 |
Token | One |
Token ID | 1881 |
Feature activation | -6.73e+01 |
Pos logit contributions | |
igsaw | +0.252 |
ey | +0.240 |
nam | +0.232 |
saw | +0.225 |
redd | +0.223 |
Neg logit contributions | |
and | -0.201 |
, | -0.186 |
. | -0.152 |
said | -0.151 |
came | -0.150 |
Token | day |
Token ID | 1110 |
Feature activation | -6.21e+01 |
Pos logit contributions | |
igsaw | +0.256 |
ey | +0.236 |
nam | +0.227 |
redd | +0.225 |
saw | +0.223 |
Neg logit contributions | |
and | -0.307 |
, | -0.288 |
said | -0.250 |
came | -0.248 |
. | -0.245 |
Token | . |
Token ID | 13 |
Feature activation | -6.42e+01 |
Pos logit contributions | |
rou | +0.474 |
nam | +0.447 |
Acc | +0.428 |
Amen | +0.427 |
ric | +0.414 |
Neg logit contributions | |
shook | -0.181 |
popped | -0.170 |
red | -0.168 |
irus | -0.167 |
and | -0.165 |
Token | She |
Token ID | 1375 |
Feature activation | -7.41e+01 |
Pos logit contributions | |
igsaw | +0.300 |
ey | +0.287 |
nam | +0.276 |
saw | +0.268 |
redd | +0.266 |
Neg logit contributions | |
and | -0.255 |
, | -0.237 |
. | -0.196 |
said | -0.190 |
came | -0.189 |
Token | loved |
Token ID | 6151 |
Feature activation | -5.12e+01 |
Pos logit contributions | |
rou | +0.203 |
Acc | +0.186 |
Amen | +0.172 |
Ag | +0.171 |
Wed | +0.170 |
Neg logit contributions | |
eness | -0.270 |
owed | -0.242 |
, | -0.239 |
and | -0.238 |
irus | -0.238 |
Token | to |
Token ID | 284 |
Feature activation | -6.73e+01 |
Pos logit contributions | |
igsaw | +0.300 |
ey | +0.289 |
nam | +0.283 |
saw | +0.275 |
pill | +0.274 |
Neg logit contributions | |
and | -0.136 |
, | -0.123 |
. | -0.090 |
said | -0.089 |
came | -0.088 |
Token | eat |
Token ID | 4483 |
Feature activation | -7.26e+01 |
Pos logit contributions | |
igsaw | +0.248 |
ey | +0.245 |
outer | +0.229 |
Wed | +0.223 |
Amen | +0.223 |
Neg logit contributions | |
and | -0.262 |
ed | -0.249 |
, | -0.235 |
said | -0.231 |
came | -0.219 |
Token | av |
Token ID | 1196 |
Feature activation | -2.10e+02 |
Pos logit contributions | |
ey | +0.210 |
nam | +0.206 |
redd | +0.204 |
igsaw | +0.204 |
pill | +0.203 |
Neg logit contributions | |
and | -0.328 |
, | -0.311 |
came | -0.310 |
fell | -0.306 |
got | -0.305 |
Token | oc |
Token ID | 420 |
Feature activation | -1.87e+02 |
Pos logit contributions | |
igsaw | +1.097 |
ey | +1.067 |
iqu | +1.046 |
nam | +1.039 |
rier | +1.005 |
Neg logit contributions | |
and | -0.812 |
, | -0.747 |
. | -0.576 |
said | -0.536 |
came | -0.518 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.75e+01 |
Pos logit contributions | |
igsaw | +0.599 |
ey | +0.555 |
nam | +0.519 |
pill | +0.512 |
saw | +0.496 |
Neg logit contributions | |
and | -1.002 |
, | -0.941 |
said | -0.833 |
. | -0.832 |
came | -0.831 |
Token | because |
Token ID | 780 |
Feature activation | -3.95e+01 |
Pos logit contributions | |
nam | +0.442 |
igsaw | +0.424 |
pill | +0.416 |
ju | +0.409 |
Wed | +0.401 |
Neg logit contributions | |
woke | -0.207 |
fell | -0.184 |
came | -0.184 |
ran | -0.179 |
checked | -0.175 |
Token | they |
Token ID | 484 |
Feature activation | -6.86e+01 |
Pos logit contributions | |
igsaw | +0.224 |
ey | +0.221 |
iqu | +0.205 |
rou | +0.205 |
Deal | +0.201 |
Neg logit contributions | |
and | -0.126 |
, | -0.115 |
. | -0.088 |
came | -0.086 |
said | -0.084 |
Token | were |
Token ID | 547 |
Feature activation | -6.13e+01 |
Pos logit contributions | |
nam | +0.352 |
redd | +0.321 |
ju | +0.312 |
pill | +0.304 |
ey | +0.294 |
Neg logit contributions | |
and | -0.198 |
, | -0.178 |
. | -0.139 |
owed | -0.134 |
again | -0.133 |
Token | . |
Token ID | 13 |
Feature activation | -6.19e+01 |
Pos logit contributions | |
rou | +0.472 |
Amen | +0.427 |
ini | +0.425 |
rier | +0.393 |
redd | +0.391 |
Neg logit contributions | |
argued | -0.189 |
scanned | -0.187 |
entered | -0.186 |
approached | -0.186 |
shook | -0.181 |
Token | Did |
Token ID | 7731 |
Feature activation | -7.66e+01 |
Pos logit contributions | |
igsaw | +0.234 |
ey | +0.220 |
nam | +0.212 |
saw | +0.203 |
redd | +0.200 |
Neg logit contributions | |
and | -0.299 |
, | -0.282 |
. | -0.241 |
said | -0.239 |
came | -0.238 |
Token | you |
Token ID | 345 |
Feature activation | -9.33e+01 |
Pos logit contributions | |
igsaw | +0.281 |
ey | +0.263 |
nam | +0.250 |
saw | +0.242 |
redd | +0.239 |
Neg logit contributions | |
and | -0.378 |
, | -0.358 |
. | -0.308 |
said | -0.306 |
came | -0.303 |
Token | know |
Token ID | 760 |
Feature activation | -6.52e+01 |
Pos logit contributions | |
igsaw | +0.298 |
nam | +0.289 |
saw | +0.286 |
rou | +0.275 |
ifying | +0.273 |
Neg logit contributions | |
and | -0.347 |
, | -0.315 |
ed | -0.301 |
came | -0.293 |
checked | -0.287 |
Token | that |
Token ID | 326 |
Feature activation | -6.55e+01 |
Pos logit contributions | |
nam | +0.241 |
reating | +0.221 |
outer | +0.211 |
saw | +0.204 |
microsc | +0.197 |
Neg logit contributions | |
and | -0.263 |
went | -0.244 |
came | -0.242 |
woke | -0.241 |
got | -0.241 |
Token | av |
Token ID | 1196 |
Feature activation | -2.09e+02 |
Pos logit contributions | |
nam | +0.210 |
igsaw | +0.187 |
Wed | +0.186 |
possible | +0.182 |
redd | +0.182 |
Neg logit contributions | |
and | -0.298 |
, | -0.265 |
owed | -0.258 |
red | -0.252 |
came | -0.245 |
Token | oc |
Token ID | 420 |
Feature activation | -1.85e+02 |
Pos logit contributions | |
igsaw | +0.938 |
ey | +0.890 |
nam | +0.858 |
saw | +0.834 |
redd | +0.818 |
Neg logit contributions | |
and | -0.864 |
, | -0.805 |
. | -0.668 |
said | -0.662 |
came | -0.657 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.59e+01 |
Pos logit contributions | |
igsaw | +0.530 |
ey | +0.482 |
pill | +0.454 |
umm | +0.453 |
nam | +0.445 |
Neg logit contributions | |
and | -1.037 |
, | -0.968 |
said | -0.885 |
came | -0.880 |
. | -0.868 |
Token | are |
Token ID | 389 |
Feature activation | -8.13e+01 |
Pos logit contributions | |
igsaw | +0.238 |
igator | +0.238 |
possible | +0.236 |
ey | +0.229 |
ju | +0.228 |
Neg logit contributions | |
ged | -0.263 |
and | -0.257 |
walked | -0.248 |
red | -0.247 |
approached | -0.243 |
Token | very |
Token ID | 845 |
Feature activation | -7.09e+01 |
Pos logit contributions | |
nam | +0.253 |
possible | +0.240 |
redd | +0.239 |
ju | +0.233 |
igsaw | +0.231 |
Neg logit contributions | |
and | -0.361 |
woke | -0.343 |
owed | -0.334 |
, | -0.332 |
hed | -0.330 |
Token | healthy |
Token ID | 5448 |
Feature activation | -8.54e+01 |
Pos logit contributions | |
igsaw | +0.218 |
nam | +0.197 |
ey | +0.195 |
redd | +0.190 |
saw | +0.184 |
Neg logit contributions | |
and | -0.382 |
, | -0.358 |
came | -0.320 |
said | -0.314 |
. | -0.311 |
Token | them |
Token ID | 606 |
Feature activation | -9.05e+01 |
Pos logit contributions | |
nam | +0.245 |
ocol | +0.220 |
outer | +0.207 |
microsc | +0.207 |
Wed | +0.197 |
Neg logit contributions | |
and | -0.275 |
woke | -0.264 |
came | -0.264 |
got | -0.263 |
shook | -0.259 |
Token | and |
Token ID | 290 |
Feature activation | -4.36e+01 |
Pos logit contributions | |
nam | +0.410 |
possible | +0.368 |
rou | +0.359 |
igsaw | +0.350 |
ju | +0.343 |
Neg logit contributions | |
woke | -0.258 |
came | -0.255 |
reached | -0.251 |
shook | -0.243 |
checked | -0.238 |
Token | never |
Token ID | 1239 |
Feature activation | -5.43e+01 |
Pos logit contributions | |
nam | +0.193 |
ey | +0.180 |
igsaw | +0.170 |
redd | +0.165 |
microsc | +0.165 |
Neg logit contributions | |
and | -0.146 |
, | -0.140 |
came | -0.115 |
. | -0.114 |
! | -0.111 |
Token | threw |
Token ID | 9617 |
Feature activation | -3.71e+01 |
Pos logit contributions | |
nam | +0.243 |
igsaw | +0.221 |
ey | +0.221 |
redd | +0.216 |
ju | +0.214 |
Neg logit contributions | |
and | -0.180 |
, | -0.148 |
es | -0.114 |
hed | -0.111 |
owed | -0.110 |
Token | a |
Token ID | 257 |
Feature activation | -7.70e+01 |
Pos logit contributions | |
nam | +0.122 |
redd | +0.116 |
igsaw | +0.114 |
ocol | +0.113 |
ju | +0.107 |
Neg logit contributions | |
and | -0.146 |
owed | -0.146 |
answered | -0.145 |
rang | -0.145 |
said | -0.143 |
Token | tant |
Token ID | 24246 |
Feature activation | -2.07e+02 |
Pos logit contributions | |
igsaw | +0.285 |
ey | +0.266 |
nam | +0.251 |
redd | +0.248 |
saw | +0.245 |
Neg logit contributions | |
and | -0.362 |
, | -0.343 |
said | -0.301 |
came | -0.297 |
. | -0.294 |
Token | rum |
Token ID | 6582 |
Feature activation | -6.38e+01 |
Pos logit contributions | |
igsaw | +0.949 |
ey | +0.902 |
nam | +0.870 |
saw | +0.845 |
redd | +0.836 |
Neg logit contributions | |
and | -0.827 |
, | -0.769 |
. | -0.636 |
said | -0.630 |
came | -0.626 |
Token | again |
Token ID | 757 |
Feature activation | -5.16e+01 |
Pos logit contributions | |
rier | +0.345 |
rou | +0.341 |
igsaw | +0.331 |
iring | +0.323 |
ifying | +0.322 |
Neg logit contributions | |
and | -0.100 |
checked | -0.095 |
answered | -0.095 |
said | -0.091 |
woke | -0.090 |
Token | . |
Token ID | 13 |
Feature activation | -4.24e+01 |
Pos logit contributions | |
possible | +0.205 |
igsaw | +0.201 |
Wed | +0.182 |
nam | +0.181 |
outer | +0.181 |
Neg logit contributions | |
woke | -0.135 |
came | -0.132 |
sat | -0.130 |
entered | -0.129 |
reached | -0.129 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -4.45e+01 |
Pos logit contributions | |
igsaw | +0.229 |
ey | +0.221 |
nam | +0.214 |
saw | +0.208 |
redd | +0.207 |
Neg logit contributions | |
and | -0.143 |
, | -0.129 |
. | -0.104 |
said | -0.102 |
came | -0.096 |
Token | Once |
Token ID | 7454 |
Feature activation | -5.37e+01 |
Pos logit contributions | |
igsaw | +0.221 |
ey | +0.210 |
saw | +0.199 |
nam | +0.198 |
pill | +0.197 |
Neg logit contributions | |
and | -0.153 |
, | -0.141 |
said | -0.114 |
came | -0.113 |
. | -0.110 |
Token | and |
Token ID | 290 |
Feature activation | -6.32e+01 |
Pos logit contributions | |
possible | +0.395 |
dining | +0.391 |
rou | +0.386 |
igsaw | +0.385 |
outer | +0.381 |
Neg logit contributions | |
shook | -0.181 |
came | -0.169 |
argued | -0.163 |
spoke | -0.158 |
squeezed | -0.158 |
Token | play |
Token ID | 711 |
Feature activation | -6.86e+01 |
Pos logit contributions | |
nam | +0.296 |
igsaw | +0.282 |
ey | +0.274 |
Amen | +0.273 |
concession | +0.272 |
Neg logit contributions | |
and | -0.204 |
, | -0.183 |
. | -0.149 |
came | -0.140 |
! | -0.133 |
Token | with |
Token ID | 351 |
Feature activation | -6.92e+01 |
Pos logit contributions | |
igsaw | +0.277 |
Wed | +0.272 |
outer | +0.271 |
rier | +0.262 |
concession | +0.261 |
Neg logit contributions | |
answered | -0.214 |
said | -0.197 |
shook | -0.192 |
rang | -0.187 |
whispered | -0.186 |
Token | your |
Token ID | 534 |
Feature activation | -1.07e+02 |
Pos logit contributions | |
igsaw | +0.263 |
nam | +0.251 |
ey | +0.245 |
outer | +0.235 |
Wed | +0.235 |
Neg logit contributions | |
and | -0.284 |
, | -0.261 |
came | -0.246 |
said | -0.244 |
got | -0.239 |
Token | or |
Token ID | 393 |
Feature activation | -6.80e+01 |
Pos logit contributions | |
igsaw | +0.395 |
ey | +0.381 |
nam | +0.369 |
redd | +0.333 |
microsc | +0.332 |
Neg logit contributions | |
and | -0.492 |
, | -0.469 |
came | -0.423 |
said | -0.408 |
got | -0.402 |
Token | nam |
Token ID | 7402 |
Feature activation | -2.06e+02 |
Pos logit contributions | |
igsaw | +0.275 |
nam | +0.265 |
ey | +0.264 |
saw | +0.245 |
redd | +0.243 |
Neg logit contributions | |
and | -0.299 |
, | -0.280 |
. | -0.237 |
said | -0.235 |
came | -0.235 |
Token | ents |
Token ID | 658 |
Feature activation | -1.11e+02 |
Pos logit contributions | |
ey | +1.069 |
igsaw | +1.055 |
downward | +1.026 |
Deal | +1.025 |
narrative | +0.998 |
Neg logit contributions | |
and | -0.772 |
, | -0.648 |
. | -0.568 |
said | -0.519 |
got | -0.508 |
Token | . |
Token ID | 13 |
Feature activation | -5.60e+01 |
Pos logit contributions | |
reating | +0.522 |
possible | +0.515 |
nam | +0.500 |
igsaw | +0.500 |
ifying | +0.494 |
Neg logit contributions | |
checked | -0.217 |
replied | -0.212 |
shook | -0.204 |
squeezed | -0.203 |
answered | -0.202 |
Token | Maybe |
Token ID | 6674 |
Feature activation | -6.36e+01 |
Pos logit contributions | |
igsaw | +0.296 |
ey | +0.287 |
iqu | +0.283 |
rou | +0.271 |
nam | +0.269 |
Neg logit contributions | |
and | -0.205 |
, | -0.183 |
. | -0.151 |
said | -0.138 |
came | -0.130 |
Token | you |
Token ID | 345 |
Feature activation | -8.81e+01 |
Pos logit contributions | |
nam | +0.258 |
igsaw | +0.244 |
redd | +0.234 |
saw | +0.232 |
ey | +0.232 |
Neg logit contributions | |
and | -0.251 |
, | -0.229 |
came | -0.203 |
got | -0.196 |
said | -0.195 |
Token | can |
Token ID | 460 |
Feature activation | -7.99e+01 |
Pos logit contributions | |
ifying | +0.194 |
ancers | +0.130 |
saw | +0.126 |
nam | +0.124 |
Amen | +0.119 |
Neg logit contributions | |
replied | -0.406 |
answered | -0.392 |
red | -0.388 |
shook | -0.385 |
and | -0.381 |
Token | It |
Token ID | 632 |
Feature activation | -7.01e+01 |
Pos logit contributions | |
igsaw | +0.228 |
pill | +0.207 |
ey | +0.198 |
nam | +0.197 |
Wed | +0.193 |
Neg logit contributions | |
and | -0.264 |
, | -0.253 |
came | -0.220 |
got | -0.216 |
. | -0.208 |
Token | turned |
Token ID | 2900 |
Feature activation | -3.65e+01 |
Pos logit contributions | |
Amen | +0.333 |
spec | +0.319 |
pill | +0.317 |
redd | +0.317 |
Looks | +0.317 |
Neg logit contributions | |
, | -0.166 |
and | -0.165 |
. | -0.106 |
owed | -0.100 |
! | -0.098 |
Token | out |
Token ID | 503 |
Feature activation | -8.11e+01 |
Pos logit contributions | |
ifying | +0.128 |
rier | +0.124 |
ju | +0.120 |
redd | +0.118 |
igsaw | +0.117 |
Neg logit contributions | |
agree | -0.135 |
owed | -0.131 |
anas | -0.129 |
counted | -0.127 |
read | -0.126 |
Token | that |
Token ID | 326 |
Feature activation | -6.38e+01 |
Pos logit contributions | |
igsaw | +0.345 |
rier | +0.339 |
outer | +0.338 |
ifying | +0.328 |
rou | +0.328 |
Neg logit contributions | |
ops | -0.213 |
anas | -0.202 |
talked | -0.190 |
hed | -0.190 |
owed | -0.186 |
Token | the |
Token ID | 262 |
Feature activation | -7.35e+01 |
Pos logit contributions | |
nam | +0.223 |
redd | +0.201 |
ju | +0.201 |
outer | +0.197 |
possible | +0.196 |
Neg logit contributions | |
and | -0.249 |
owed | -0.228 |
, | -0.218 |
red | -0.216 |
anas | -0.215 |
Token | av |
Token ID | 1196 |
Feature activation | -2.05e+02 |
Pos logit contributions | |
igsaw | +0.263 |
ey | +0.255 |
nam | +0.249 |
redd | +0.243 |
saw | +0.242 |
Neg logit contributions | |
and | -0.333 |
, | -0.319 |
came | -0.275 |
said | -0.271 |
. | -0.267 |
Token | oc |
Token ID | 420 |
Feature activation | -1.82e+02 |
Pos logit contributions | |
igsaw | +1.034 |
ey | +0.986 |
nam | +0.984 |
saw | +0.940 |
iqu | +0.938 |
Neg logit contributions | |
and | -0.779 |
, | -0.717 |
. | -0.559 |
said | -0.554 |
came | -0.553 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.30e+01 |
Pos logit contributions | |
igsaw | +0.588 |
ey | +0.545 |
nam | +0.505 |
pill | +0.491 |
saw | +0.489 |
Neg logit contributions | |
and | -0.969 |
, | -0.914 |
said | -0.811 |
came | -0.805 |
. | -0.804 |
Token | were |
Token ID | 547 |
Feature activation | -5.99e+01 |
Pos logit contributions | |
pill | +0.229 |
ju | +0.225 |
igator | +0.221 |
glers | +0.220 |
ey | +0.218 |
Neg logit contributions | |
anas | -0.234 |
red | -0.231 |
irus | -0.211 |
aron | -0.205 |
and | -0.200 |
Token | bad |
Token ID | 2089 |
Feature activation | -7.07e+01 |
Pos logit contributions | |
nam | +0.171 |
rou | +0.152 |
ju | +0.151 |
redd | +0.147 |
possible | +0.145 |
Neg logit contributions | |
owed | -0.253 |
and | -0.245 |
woke | -0.243 |
, | -0.237 |
replied | -0.233 |
Token | and |
Token ID | 290 |
Feature activation | -3.33e+01 |
Pos logit contributions | |
pill | +0.319 |
spec | +0.318 |
igsaw | +0.312 |
umm | +0.311 |
rou | +0.303 |
Neg logit contributions | |
touched | -0.137 |
answered | -0.137 |
whispered | -0.136 |
held | -0.136 |
reached | -0.134 |
Token | still |
Token ID | 991 |
Feature activation | -7.55e+01 |
Pos logit contributions | |
nam | +0.191 |
igsaw | +0.165 |
ey | +0.163 |
redd | +0.155 |
microsc | +0.152 |
Neg logit contributions | |
and | -0.193 |
, | -0.168 |
. | -0.159 |
came | -0.146 |
said | -0.145 |
Token | couldn |
Token ID | 3521 |
Feature activation | -5.31e+01 |
Pos logit contributions | |
nam | +0.311 |
rou | +0.288 |
possible | +0.277 |
ey | +0.268 |
iqu | +0.261 |
Neg logit contributions | |
and | -0.235 |
hed | -0.203 |
red | -0.199 |
, | -0.193 |
woke | -0.192 |
Token | 't |
Token ID | 470 |
Feature activation | -8.81e+01 |
Pos logit contributions | |
igsaw | +0.189 |
ey | +0.178 |
nam | +0.170 |
saw | +0.162 |
redd | +0.161 |
Neg logit contributions | |
and | -0.269 |
, | -0.254 |
. | -0.220 |
said | -0.217 |
came | -0.217 |
Token | find |
Token ID | 1064 |
Feature activation | -5.87e+01 |
Pos logit contributions | |
igsaw | +0.339 |
ey | +0.328 |
saw | +0.321 |
nam | +0.318 |
outer | +0.313 |
Neg logit contributions | |
and | -0.330 |
, | -0.299 |
said | -0.281 |
came | -0.270 |
gave | -0.269 |
Token | any |
Token ID | 597 |
Feature activation | -8.24e+01 |
Pos logit contributions | |
igsaw | +0.246 |
nam | +0.238 |
ey | +0.233 |
redd | +0.221 |
outer | +0.220 |
Neg logit contributions | |
and | -0.237 |
, | -0.212 |
said | -0.189 |
came | -0.181 |
gave | -0.175 |
Token | av |
Token ID | 1196 |
Feature activation | -2.05e+02 |
Pos logit contributions | |
ey | +0.299 |
igsaw | +0.290 |
saw | +0.286 |
redd | +0.283 |
spec | +0.281 |
Neg logit contributions | |
and | -0.329 |
, | -0.303 |
said | -0.291 |
came | -0.287 |
woke | -0.282 |
Token | oc |
Token ID | 420 |
Feature activation | -1.82e+02 |
Pos logit contributions | |
igsaw | +1.056 |
ey | +1.020 |
nam | +0.992 |
iqu | +0.957 |
saw | +0.953 |
Neg logit contributions | |
and | -0.777 |
, | -0.715 |
. | -0.565 |
said | -0.524 |
came | -0.523 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.30e+01 |
Pos logit contributions | |
igsaw | +0.545 |
ey | +0.489 |
pill | +0.465 |
nam | +0.458 |
umm | +0.454 |
Neg logit contributions | |
and | -1.012 |
, | -0.951 |
said | -0.866 |
came | -0.858 |
. | -0.844 |
Token | . |
Token ID | 13 |
Feature activation | -4.52e+01 |
Pos logit contributions | |
nam | +0.391 |
possible | +0.387 |
reating | +0.357 |
ju | +0.351 |
pill | +0.345 |
Neg logit contributions | |
woke | -0.172 |
nodded | -0.155 |
squeezed | -0.154 |
checked | -0.153 |
replied | -0.152 |
Token | Finally |
Token ID | 9461 |
Feature activation | -3.74e+01 |
Pos logit contributions | |
igsaw | +0.187 |
pill | +0.168 |
ey | +0.168 |
nam | +0.166 |
saw | +0.161 |
Neg logit contributions | |
and | -0.191 |
, | -0.179 |
came | -0.154 |
said | -0.150 |
. | -0.149 |
Token | , |
Token ID | 11 |
Feature activation | -4.10e+01 |
Pos logit contributions | |
igsaw | +0.192 |
Wed | +0.177 |
ocol | +0.176 |
nam | +0.170 |
avail | +0.167 |
Neg logit contributions | |
and | -0.072 |
owed | -0.058 |
anas | -0.056 |
red | -0.053 |
watered | -0.051 |
Token | oll |
Token ID | 692 |
Feature activation | -1.68e+02 |
Pos logit contributions | |
igsaw | +0.173 |
ey | +0.154 |
pill | +0.139 |
saw | +0.132 |
rou | +0.126 |
Neg logit contributions | |
and | -0.511 |
, | -0.486 |
said | -0.443 |
came | -0.437 |
. | -0.433 |
Token | ip |
Token ID | 541 |
Feature activation | -1.19e+02 |
Pos logit contributions | |
ocol | +0.435 |
rou | +0.413 |
nam | +0.394 |
igsaw | +0.382 |
pill | +0.376 |
Neg logit contributions | |
and | -0.817 |
came | -0.710 |
went | -0.707 |
, | -0.707 |
woke | -0.706 |
Token | ops |
Token ID | 2840 |
Feature activation | -7.60e+01 |
Pos logit contributions | |
pill | +0.412 |
rou | +0.331 |
igsaw | +0.286 |
ju | +0.268 |
concession | +0.268 |
Neg logit contributions | |
answered | -0.459 |
entered | -0.445 |
searched | -0.441 |
checked | -0.432 |
came | -0.425 |
Token | , |
Token ID | 11 |
Feature activation | -5.12e+01 |
Pos logit contributions | |
igsaw | +0.377 |
rou | +0.359 |
saw | +0.353 |
pill | +0.353 |
reating | +0.348 |
Neg logit contributions | |
answered | -0.138 |
promised | -0.133 |
said | -0.128 |
checked | -0.125 |
listened | -0.123 |
Token | ch |
Token ID | 442 |
Feature activation | -8.53e+01 |
Pos logit contributions | |
nam | +0.219 |
ey | +0.211 |
redd | +0.210 |
pill | +0.207 |
ju | +0.203 |
Neg logit contributions | |
, | -0.155 |
and | -0.149 |
owed | -0.147 |
woke | -0.145 |
came | -0.145 |
Token | ocol |
Token ID | 4668 |
Feature activation | -2.04e+02 |
Pos logit contributions | |
igsaw | +0.112 |
ey | +0.105 |
ocol | +0.104 |
pill | +0.086 |
rou | +0.075 |
Neg logit contributions | |
and | -0.608 |
, | -0.580 |
said | -0.557 |
came | -0.545 |
went | -0.538 |
Token | ates |
Token ID | 689 |
Feature activation | -6.01e+01 |
Pos logit contributions | |
igsaw | +0.636 |
ey | +0.613 |
pill | +0.562 |
nam | +0.559 |
ocol | +0.554 |
Neg logit contributions | |
and | -1.080 |
, | -1.026 |
said | -0.919 |
came | -0.897 |
. | -0.889 |
Token | , |
Token ID | 11 |
Feature activation | -4.51e+01 |
Pos logit contributions | |
ifying | +0.306 |
rou | +0.301 |
rier | +0.291 |
pill | +0.291 |
igsaw | +0.279 |
Neg logit contributions | |
checked | -0.088 |
promised | -0.081 |
woke | -0.077 |
listened | -0.076 |
entered | -0.073 |
Token | g |
Token ID | 308 |
Feature activation | -8.37e+01 |
Pos logit contributions | |
ey | +0.246 |
redd | +0.246 |
ju | +0.244 |
pill | +0.241 |
nam | +0.238 |
Neg logit contributions | |
owed | -0.067 |
answered | -0.059 |
, | -0.058 |
came | -0.057 |
listened | -0.054 |
Token | ummies |
Token ID | 39578 |
Feature activation | -8.27e+01 |
Pos logit contributions | |
igsaw | +0.256 |
ey | +0.228 |
ocol | +0.216 |
pill | +0.210 |
umm | +0.206 |
Neg logit contributions | |
and | -0.465 |
, | -0.435 |
said | -0.407 |
came | -0.401 |
gave | -0.394 |
Token | , |
Token ID | 11 |
Feature activation | -4.10e+01 |
Pos logit contributions | |
pill | +0.483 |
igsaw | +0.478 |
rou | +0.443 |
ey | +0.426 |
ifying | +0.423 |
Neg logit contributions | |
promised | -0.103 |
listened | -0.092 |
searched | -0.087 |
checked | -0.082 |
owed | -0.079 |
Token | , |
Token ID | 11 |
Feature activation | -5.29e+01 |
Pos logit contributions | |
rou | +0.369 |
igsaw | +0.363 |
nam | +0.352 |
spe | +0.335 |
pill | +0.334 |
Neg logit contributions | |
came | -0.119 |
woke | -0.117 |
checked | -0.107 |
squeezed | -0.102 |
shook | -0.100 |
Token | and |
Token ID | 290 |
Feature activation | -4.36e+01 |
Pos logit contributions | |
nam | +0.257 |
redd | +0.254 |
ju | +0.253 |
ey | +0.248 |
igsaw | +0.245 |
Neg logit contributions | |
owed | -0.114 |
, | -0.114 |
woke | -0.104 |
came | -0.100 |
red | -0.100 |
Token | all |
Token ID | 477 |
Feature activation | -7.16e+01 |
Pos logit contributions | |
nam | +0.199 |
redd | +0.184 |
igsaw | +0.183 |
ey | +0.180 |
saw | +0.177 |
Neg logit contributions | |
and | -0.143 |
, | -0.135 |
. | -0.114 |
came | -0.106 |
! | -0.102 |
Token | the |
Token ID | 262 |
Feature activation | -7.19e+01 |
Pos logit contributions | |
igsaw | +0.291 |
ey | +0.272 |
saw | +0.263 |
Wed | +0.262 |
nam | +0.261 |
Neg logit contributions | |
and | -0.261 |
, | -0.236 |
came | -0.188 |
said | -0.186 |
shook | -0.185 |
Token | other |
Token ID | 584 |
Feature activation | -9.45e+01 |
Pos logit contributions | |
igsaw | +0.257 |
ey | +0.254 |
nam | +0.246 |
saw | +0.233 |
pill | +0.233 |
Neg logit contributions | |
and | -0.334 |
, | -0.322 |
came | -0.274 |
. | -0.270 |
said | -0.268 |
Token | av |
Token ID | 1196 |
Feature activation | -2.02e+02 |
Pos logit contributions | |
pill | +0.273 |
igsaw | +0.258 |
ey | +0.250 |
umm | +0.238 |
ocol | +0.227 |
Neg logit contributions | |
and | -0.431 |
owed | -0.416 |
, | -0.415 |
whispered | -0.402 |
fell | -0.401 |
Token | oc |
Token ID | 420 |
Feature activation | -1.79e+02 |
Pos logit contributions | |
igsaw | +0.997 |
ey | +0.956 |
nam | +0.927 |
saw | +0.901 |
iqu | +0.885 |
Neg logit contributions | |
and | -0.784 |
, | -0.724 |
said | -0.581 |
. | -0.579 |
came | -0.569 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.03e+01 |
Pos logit contributions | |
igsaw | +0.519 |
ey | +0.481 |
umm | +0.446 |
pill | +0.443 |
nam | +0.429 |
Neg logit contributions | |
and | -1.009 |
, | -0.946 |
said | -0.865 |
came | -0.857 |
. | -0.845 |
Token | were |
Token ID | 547 |
Feature activation | -5.71e+01 |
Pos logit contributions | |
nam | +0.319 |
pill | +0.316 |
gol | +0.299 |
glers | +0.297 |
possible | +0.292 |
Neg logit contributions | |
woke | -0.168 |
and | -0.166 |
, | -0.157 |
irus | -0.157 |
int | -0.139 |
Token | very |
Token ID | 845 |
Feature activation | -6.55e+01 |
Pos logit contributions | |
redd | +0.188 |
possible | +0.182 |
nam | +0.181 |
ju | +0.181 |
rou | +0.167 |
Neg logit contributions | |
woke | -0.237 |
owed | -0.223 |
and | -0.215 |
fell | -0.205 |
rang | -0.200 |
Token | happy |
Token ID | 3772 |
Feature activation | -6.07e+01 |
Pos logit contributions | |
igsaw | +0.215 |
nam | +0.196 |
ey | +0.194 |
redd | +0.190 |
saw | +0.186 |
Neg logit contributions | |
and | -0.342 |
, | -0.320 |
came | -0.283 |
. | -0.276 |
said | -0.275 |
Token | time |
Token ID | 640 |
Feature activation | -8.45e+01 |
Pos logit contributions | |
igsaw | +0.361 |
ey | +0.319 |
outer | +0.274 |
gol | +0.268 |
Wed | +0.267 |
Neg logit contributions | |
and | -0.398 |
said | -0.378 |
, | -0.366 |
came | -0.358 |
gave | -0.358 |
Token | playing |
Token ID | 2712 |
Feature activation | -7.37e+01 |
Pos logit contributions | |
rou | +0.366 |
igsaw | +0.349 |
outer | +0.335 |
ocol | +0.328 |
pill | +0.323 |
Neg logit contributions | |
woke | -0.248 |
said | -0.222 |
got | -0.216 |
lied | -0.215 |
shook | -0.215 |
Token | with |
Token ID | 351 |
Feature activation | -6.46e+01 |
Pos logit contributions | |
igsaw | +0.349 |
ey | +0.311 |
outer | +0.311 |
Wed | +0.305 |
nam | +0.304 |
Neg logit contributions | |
checked | -0.198 |
fell | -0.193 |
rang | -0.190 |
answered | -0.190 |
got | -0.188 |
Token | her |
Token ID | 607 |
Feature activation | -9.30e+01 |
Pos logit contributions | |
igsaw | +0.277 |
nam | +0.261 |
ey | +0.257 |
pill | +0.246 |
microsc | +0.240 |
Neg logit contributions | |
and | -0.247 |
, | -0.233 |
said | -0.200 |
got | -0.198 |
came | -0.198 |
Token | or |
Token ID | 393 |
Feature activation | -6.39e+01 |
Pos logit contributions | |
igsaw | +0.325 |
nam | +0.304 |
ey | +0.291 |
microsc | +0.269 |
rou | +0.260 |
Neg logit contributions | |
and | -0.408 |
came | -0.400 |
, | -0.389 |
woke | -0.387 |
got | -0.383 |
Token | nam |
Token ID | 7402 |
Feature activation | -2.02e+02 |
Pos logit contributions | |
nam | +0.276 |
igsaw | +0.257 |
ey | +0.249 |
redd | +0.233 |
microsc | +0.230 |
Neg logit contributions | |
and | -0.262 |
, | -0.242 |
. | -0.205 |
came | -0.203 |
said | -0.203 |
Token | ents |
Token ID | 658 |
Feature activation | -1.08e+02 |
Pos logit contributions | |
ey | +1.026 |
igsaw | +1.007 |
Deal | +1.006 |
narrative | +0.994 |
downward | +0.984 |
Neg logit contributions | |
and | -0.774 |
, | -0.656 |
. | -0.571 |
! | -0.505 |
said | -0.498 |
Token | and |
Token ID | 290 |
Feature activation | -4.36e+01 |
Pos logit contributions | |
nam | +0.485 |
reating | +0.477 |
possible | +0.474 |
rou | +0.470 |
igsaw | +0.462 |
Neg logit contributions | |
checked | -0.203 |
woke | -0.186 |
squeezed | -0.185 |
shook | -0.169 |
turned | -0.165 |
Token | making |
Token ID | 1642 |
Feature activation | -5.10e+01 |
Pos logit contributions | |
nam | +0.228 |
redd | +0.199 |
igsaw | +0.199 |
saw | +0.189 |
ey | +0.189 |
Neg logit contributions | |
and | -0.131 |
, | -0.117 |
woke | -0.098 |
. | -0.094 |
came | -0.089 |
Token | her |
Token ID | 607 |
Feature activation | -8.25e+01 |
Pos logit contributions | |
igsaw | +0.205 |
nam | +0.197 |
pill | +0.181 |
redd | +0.181 |
ey | +0.180 |
Neg logit contributions | |
and | -0.212 |
, | -0.202 |
came | -0.174 |
woke | -0.173 |
fell | -0.173 |
Token | special |
Token ID | 2041 |
Feature activation | -7.71e+01 |
Pos logit contributions | |
igsaw | +0.342 |
nam | +0.337 |
ey | +0.318 |
redd | +0.304 |
saw | +0.301 |
Neg logit contributions | |
and | -0.328 |
, | -0.309 |
came | -0.275 |
said | -0.258 |
woke | -0.255 |
Token | giving |
Token ID | 3501 |
Feature activation | -3.99e+01 |
Pos logit contributions | |
igsaw | +0.258 |
Wed | +0.251 |
ric | +0.251 |
ey | +0.250 |
ocol | +0.248 |
Neg logit contributions | |
and | -0.357 |
, | -0.325 |
came | -0.321 |
agreed | -0.311 |
went | -0.309 |
Token | him |
Token ID | 683 |
Feature activation | -7.88e+01 |
Pos logit contributions | |
igsaw | +0.157 |
ey | +0.148 |
nam | +0.148 |
redd | +0.146 |
pill | +0.140 |
Neg logit contributions | |
and | -0.155 |
, | -0.144 |
came | -0.129 |
fell | -0.124 |
followed | -0.122 |
Token | too |
Token ID | 1165 |
Feature activation | -7.98e+01 |
Pos logit contributions | |
igsaw | +0.283 |
redd | +0.269 |
ju | +0.264 |
rou | +0.263 |
nam | +0.260 |
Neg logit contributions | |
and | -0.302 |
came | -0.289 |
answered | -0.275 |
said | -0.273 |
, | -0.273 |
Token | many |
Token ID | 867 |
Feature activation | -7.43e+01 |
Pos logit contributions | |
igsaw | +0.195 |
rou | +0.177 |
redd | +0.168 |
nam | +0.162 |
arl | +0.151 |
Neg logit contributions | |
and | -0.411 |
came | -0.403 |
woke | -0.372 |
checked | -0.371 |
searched | -0.369 |
Token | ch |
Token ID | 442 |
Feature activation | -8.22e+01 |
Pos logit contributions | |
ocol | +0.305 |
ey | +0.288 |
pill | +0.286 |
reating | +0.280 |
microsc | +0.279 |
Neg logit contributions | |
came | -0.221 |
got | -0.220 |
and | -0.220 |
fell | -0.217 |
looked | -0.214 |
Token | ocol |
Token ID | 4668 |
Feature activation | -2.01e+02 |
Pos logit contributions | |
igsaw | +0.115 |
ey | +0.104 |
ocol | +0.101 |
ubb | +0.075 |
rou | +0.073 |
Neg logit contributions | |
and | -0.589 |
, | -0.562 |
said | -0.537 |
came | -0.526 |
got | -0.518 |
Token | ates |
Token ID | 689 |
Feature activation | -5.74e+01 |
Pos logit contributions | |
igsaw | +0.655 |
ey | +0.622 |
nam | +0.584 |
pill | +0.563 |
ocol | +0.556 |
Neg logit contributions | |
and | -1.049 |
, | -0.993 |
said | -0.881 |
came | -0.870 |
. | -0.855 |
Token | ." |
Token ID | 526 |
Feature activation | -3.94e+01 |
Pos logit contributions | |
ifying | +0.281 |
igsaw | +0.268 |
possible | +0.266 |
rier | +0.265 |
nam | +0.260 |
Neg logit contributions | |
checked | -0.144 |
came | -0.138 |
shook | -0.130 |
woke | -0.129 |
answered | -0.128 |
Token | The |
Token ID | 383 |
Feature activation | -4.98e+01 |
Pos logit contributions | |
igsaw | +0.129 |
nam | +0.125 |
Amen | +0.124 |
Wed | +0.118 |
pill | +0.118 |
Neg logit contributions | |
and | -0.161 |
, | -0.150 |
came | -0.145 |
got | -0.143 |
woke | -0.137 |
Token | king |
Token ID | 5822 |
Feature activation | -5.26e+01 |
Pos logit contributions | |
igsaw | +0.186 |
ey | +0.175 |
nam | +0.169 |
saw | +0.163 |
redd | +0.159 |
Neg logit contributions | |
and | -0.237 |
, | -0.223 |
came | -0.190 |
. | -0.190 |
said | -0.189 |
Token | learned |
Token ID | 4499 |
Feature activation | -3.04e+01 |
Pos logit contributions | |
rou | +0.261 |
spe | +0.238 |
ey | +0.236 |
saw | +0.235 |
Amen | +0.234 |
Neg logit contributions | |
, | -0.090 |
and | -0.077 |
because | -0.072 |
- | -0.066 |
. | -0.063 |
Token | as |
Token ID | 355 |
Feature activation | -6.34e+01 |
Pos logit contributions | |
rier | +0.455 |
igsaw | +0.454 |
Wed | +0.411 |
reating | +0.409 |
possible | +0.404 |
Neg logit contributions | |
checked | -0.196 |
touched | -0.194 |
came | -0.182 |
squeezed | -0.175 |
reached | -0.170 |
Token | he |
Token ID | 339 |
Feature activation | -7.11e+01 |
Pos logit contributions | |
igsaw | +0.255 |
nam | +0.244 |
outer | +0.235 |
saw | +0.230 |
possible | +0.226 |
Neg logit contributions | |
and | -0.249 |
, | -0.227 |
said | -0.201 |
woke | -0.200 |
came | -0.200 |
Token | opened |
Token ID | 4721 |
Feature activation | -3.60e+01 |
Pos logit contributions | |
pill | +0.272 |
rou | +0.267 |
concession | +0.256 |
iam | +0.244 |
iqu | +0.242 |
Neg logit contributions | |
, | -0.199 |
and | -0.173 |
because | -0.147 |
; | -0.136 |
but | -0.136 |
Token | the |
Token ID | 262 |
Feature activation | -7.19e+01 |
Pos logit contributions | |
igsaw | +0.147 |
outer | +0.139 |
nam | +0.129 |
ifying | +0.129 |
ey | +0.128 |
Neg logit contributions | |
and | -0.128 |
woke | -0.125 |
came | -0.124 |
listened | -0.123 |
got | -0.122 |
Token | ch |
Token ID | 442 |
Feature activation | -8.22e+01 |
Pos logit contributions | |
igsaw | +0.267 |
ey | +0.252 |
nam | +0.232 |
saw | +0.224 |
redd | +0.222 |
Neg logit contributions | |
and | -0.347 |
, | -0.328 |
came | -0.290 |
said | -0.287 |
. | -0.277 |
Token | ocol |
Token ID | 4668 |
Feature activation | -2.01e+02 |
Pos logit contributions | |
igsaw | +0.138 |
ey | +0.120 |
ocol | +0.099 |
pill | +0.083 |
saw | +0.080 |
Neg logit contributions | |
and | -0.577 |
, | -0.552 |
said | -0.513 |
came | -0.504 |
. | -0.499 |
Token | ates |
Token ID | 689 |
Feature activation | -5.74e+01 |
Pos logit contributions | |
igsaw | +0.687 |
ey | +0.654 |
nam | +0.605 |
pill | +0.589 |
saw | +0.578 |
Neg logit contributions | |
and | -1.029 |
, | -0.975 |
said | -0.853 |
came | -0.842 |
. | -0.839 |
Token | and |
Token ID | 290 |
Feature activation | -3.47e+01 |
Pos logit contributions | |
ifying | +0.310 |
rier | +0.306 |
igsaw | +0.300 |
nam | +0.284 |
reating | +0.282 |
Neg logit contributions | |
checked | -0.107 |
woke | -0.101 |
came | -0.101 |
listened | -0.094 |
helped | -0.087 |
Token | started |
Token ID | 2067 |
Feature activation | -4.31e+01 |
Pos logit contributions | |
nam | +0.176 |
igsaw | +0.158 |
ey | +0.158 |
saw | +0.157 |
redd | +0.154 |
Neg logit contributions | |
and | -0.104 |
, | -0.096 |
. | -0.077 |
came | -0.067 |
owed | -0.064 |
Token | eating |
Token ID | 6600 |
Feature activation | -5.02e+01 |
Pos logit contributions | |
Wed | +0.163 |
igsaw | +0.161 |
nam | +0.158 |
rou | +0.156 |
outer | +0.151 |
Neg logit contributions | |
and | -0.154 |
, | -0.144 |
said | -0.130 |
woke | -0.130 |
owed | -0.129 |
Token | them |
Token ID | 606 |
Feature activation | -7.62e+01 |
Pos logit contributions | |
nam | +0.188 |
igsaw | +0.183 |
rier | +0.181 |
ju | +0.179 |
redd | +0.178 |
Neg logit contributions | |
woke | -0.183 |
owed | -0.176 |
came | -0.176 |
got | -0.174 |
fell | -0.174 |
Token | 't |
Token ID | 470 |
Feature activation | -1.29e+02 |
Pos logit contributions | |
and | +0.175 |
searched | +0.138 |
whispered | +0.138 |
hed | +0.131 |
checked | +0.129 |
Neg logit contributions | |
nam | -0.411 |
igsaw | -0.396 |
redd | -0.388 |
ju | -0.378 |
ocol | -0.375 |
Token | know |
Token ID | 760 |
Feature activation | -9.10e+01 |
Pos logit contributions | |
possible | +0.508 |
nam | +0.490 |
saw | +0.454 |
gol | +0.447 |
igsaw | +0.443 |
Neg logit contributions | |
and | -0.442 |
, | -0.371 |
woke | -0.354 |
shook | -0.353 |
looked | -0.351 |
Token | the |
Token ID | 262 |
Feature activation | -9.86e+01 |
Pos logit contributions | |
nam | +0.323 |
microsc | +0.299 |
outer | +0.297 |
reating | +0.296 |
Wed | +0.281 |
Neg logit contributions | |
hed | -0.333 |
and | -0.324 |
squeezed | -0.317 |
got | -0.314 |
fell | -0.309 |
Token | rh |
Token ID | 9529 |
Feature activation | -1.30e+02 |
Pos logit contributions | |
igsaw | +0.340 |
ey | +0.332 |
nam | +0.314 |
redd | +0.307 |
saw | +0.288 |
Neg logit contributions | |
and | -0.474 |
, | -0.453 |
came | -0.399 |
said | -0.391 |
got | -0.385 |
Token | in |
Token ID | 259 |
Feature activation | -1.42e+02 |
Pos logit contributions | |
igsaw | +0.403 |
ey | +0.373 |
nam | +0.353 |
saw | +0.337 |
pill | +0.331 |
Neg logit contributions | |
and | -0.718 |
, | -0.681 |
. | -0.597 |
said | -0.594 |
came | -0.591 |
Token | oc |
Token ID | 420 |
Feature activation | -2.01e+02 |
Pos logit contributions | |
igsaw | +0.276 |
pill | +0.271 |
Amen | +0.253 |
ju | +0.250 |
spe | +0.247 |
Neg logit contributions | |
and | -0.707 |
, | -0.674 |
ran | -0.640 |
shook | -0.628 |
squeezed | -0.626 |
Token | eros |
Token ID | 27498 |
Feature activation | -9.11e+01 |
Pos logit contributions | |
igsaw | +0.501 |
ey | +0.450 |
umm | +0.438 |
rier | +0.432 |
ocol | +0.415 |
Neg logit contributions | |
and | -1.184 |
, | -1.120 |
said | -1.041 |
came | -1.038 |
got | -1.006 |
Token | was |
Token ID | 373 |
Feature activation | -7.39e+01 |
Pos logit contributions | |
spe | +0.328 |
pill | +0.315 |
saw | +0.313 |
wre | +0.312 |
possible | +0.307 |
Neg logit contributions | |
and | -0.221 |
squeezed | -0.199 |
rubbed | -0.184 |
ged | -0.184 |
shook | -0.183 |
Token | wild |
Token ID | 4295 |
Feature activation | -9.48e+01 |
Pos logit contributions | |
nam | +0.245 |
possible | +0.244 |
rou | +0.242 |
igsaw | +0.234 |
outer | +0.233 |
Neg logit contributions | |
and | -0.290 |
, | -0.271 |
owed | -0.259 |
woke | -0.249 |
said | -0.243 |
Token | . |
Token ID | 13 |
Feature activation | -5.78e+01 |
Pos logit contributions | |
igsaw | +0.540 |
spec | +0.537 |
pill | +0.528 |
rou | +0.511 |
umm | +0.510 |
Neg logit contributions | |
said | -0.117 |
held | -0.115 |
got | -0.114 |
squeezed | -0.113 |
fell | -0.112 |
Token | Can |
Token ID | 1680 |
Feature activation | -7.91e+01 |
Pos logit contributions | |
igsaw | +0.212 |
pill | +0.189 |
ey | +0.189 |
nam | +0.188 |
saw | +0.182 |
Neg logit contributions | |
and | -0.259 |
, | -0.243 |
came | -0.220 |
got | -0.219 |
fell | -0.210 |
Token | was |
Token ID | 373 |
Feature activation | -6.46e+01 |
Pos logit contributions | |
redd | +0.286 |
spec | +0.276 |
ju | +0.275 |
Isa | +0.271 |
triangular | +0.266 |
Neg logit contributions | |
and | -0.177 |
, | -0.173 |
! | -0.134 |
; | -0.134 |
agree | -0.132 |
Token | a |
Token ID | 257 |
Feature activation | -8.00e+01 |
Pos logit contributions | |
nam | +0.257 |
ey | +0.255 |
igsaw | +0.252 |
redd | +0.236 |
outer | +0.232 |
Neg logit contributions | |
and | -0.259 |
, | -0.243 |
came | -0.211 |
said | -0.206 |
woke | -0.205 |
Token | box |
Token ID | 3091 |
Feature activation | -5.85e+01 |
Pos logit contributions | |
igsaw | +0.291 |
ey | +0.274 |
nam | +0.261 |
redd | +0.247 |
saw | +0.245 |
Neg logit contributions | |
and | -0.386 |
, | -0.361 |
said | -0.314 |
. | -0.314 |
came | -0.313 |
Token | of |
Token ID | 286 |
Feature activation | -9.40e+01 |
Pos logit contributions | |
igsaw | +0.332 |
saw | +0.305 |
ey | +0.302 |
ifying | +0.301 |
rou | +0.299 |
Neg logit contributions | |
and | -0.112 |
, | -0.095 |
said | -0.089 |
came | -0.088 |
listened | -0.087 |
Token | ch |
Token ID | 442 |
Feature activation | -8.08e+01 |
Pos logit contributions | |
ey | +0.324 |
igsaw | +0.322 |
nam | +0.321 |
redd | +0.290 |
microsc | +0.279 |
Neg logit contributions | |
and | -0.447 |
, | -0.426 |
came | -0.376 |
. | -0.366 |
said | -0.365 |
Token | ocol |
Token ID | 4668 |
Feature activation | -2.00e+02 |
Pos logit contributions | |
igsaw | +0.109 |
ey | +0.091 |
nam | +0.073 |
saw | +0.064 |
pill | +0.064 |
Neg logit contributions | |
and | -0.590 |
, | -0.568 |
said | -0.519 |
. | -0.515 |
came | -0.515 |
Token | ates |
Token ID | 689 |
Feature activation | -5.60e+01 |
Pos logit contributions | |
igsaw | +0.721 |
ey | +0.681 |
nam | +0.644 |
saw | +0.616 |
pill | +0.615 |
Neg logit contributions | |
and | -0.990 |
, | -0.935 |
said | -0.808 |
. | -0.806 |
came | -0.800 |
Token | . |
Token ID | 13 |
Feature activation | -4.24e+01 |
Pos logit contributions | |
ifying | +0.303 |
igsaw | +0.282 |
possible | +0.272 |
rier | +0.271 |
rou | +0.268 |
Neg logit contributions | |
checked | -0.104 |
woke | -0.102 |
promised | -0.099 |
came | -0.096 |
listened | -0.094 |
Token | Lily |
Token ID | 20037 |
Feature activation | -7.64e+01 |
Pos logit contributions | |
igsaw | +0.183 |
ey | +0.173 |
nam | +0.166 |
saw | +0.162 |
redd | +0.160 |
Neg logit contributions | |
and | -0.180 |
, | -0.168 |
. | -0.141 |
came | -0.141 |
said | -0.140 |
Token | loved |
Token ID | 6151 |
Feature activation | -3.12e+01 |
Pos logit contributions | |
pill | +0.297 |
spec | +0.279 |
ey | +0.274 |
rou | +0.272 |
lon | +0.270 |
Neg logit contributions | |
eness | -0.184 |
fulness | -0.180 |
, | -0.168 |
. | -0.160 |
; | -0.158 |
Token | ch |
Token ID | 442 |
Feature activation | -7.30e+01 |
Pos logit contributions | |
igsaw | +0.129 |
nam | +0.125 |
ey | +0.125 |
pill | +0.119 |
saw | +0.117 |
Neg logit contributions | |
and | -0.129 |
, | -0.122 |
said | -0.101 |
came | -0.101 |
. | -0.101 |
Token | flashlight |
Token ID | 38371 |
Feature activation | -7.96e+01 |
Pos logit contributions | |
igsaw | +0.293 |
ey | +0.271 |
nam | +0.260 |
redd | +0.250 |
saw | +0.240 |
Neg logit contributions | |
and | -0.408 |
, | -0.380 |
came | -0.342 |
said | -0.339 |
. | -0.338 |
Token | , |
Token ID | 11 |
Feature activation | -5.12e+01 |
Pos logit contributions | |
ifying | +0.371 |
igsaw | +0.361 |
saw | +0.347 |
rier | +0.341 |
outer | +0.340 |
Neg logit contributions | |
promised | -0.139 |
listened | -0.135 |
squeezed | -0.133 |
cried | -0.123 |
guessed | -0.119 |
Token | and |
Token ID | 290 |
Feature activation | -4.19e+01 |
Pos logit contributions | |
nam | +0.249 |
redd | +0.242 |
ey | +0.239 |
outer | +0.235 |
ju | +0.232 |
Neg logit contributions | |
, | -0.102 |
owed | -0.093 |
replied | -0.087 |
answered | -0.085 |
woke | -0.085 |
Token | a |
Token ID | 257 |
Feature activation | -7.85e+01 |
Pos logit contributions | |
nam | +0.187 |
igsaw | +0.164 |
redd | +0.162 |
ey | +0.159 |
saw | +0.158 |
Neg logit contributions | |
and | -0.149 |
, | -0.145 |
. | -0.119 |
came | -0.113 |
! | -0.112 |
Token | magn |
Token ID | 7842 |
Feature activation | -7.71e+01 |
Pos logit contributions | |
igsaw | +0.267 |
ey | +0.255 |
nam | +0.244 |
redd | +0.228 |
saw | +0.222 |
Neg logit contributions | |
and | -0.376 |
, | -0.351 |
came | -0.315 |
said | -0.313 |
. | -0.307 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.00e+02 |
Pos logit contributions | |
igsaw | +0.084 |
ifying | +0.072 |
ey | +0.063 |
saw | +0.050 |
nam | +0.048 |
Neg logit contributions | |
and | -0.553 |
, | -0.540 |
said | -0.503 |
came | -0.490 |
. | -0.484 |
Token | glass |
Token ID | 5405 |
Feature activation | -6.37e+01 |
Pos logit contributions | |
igsaw | +0.770 |
ey | +0.726 |
nam | +0.710 |
saw | +0.682 |
pill | +0.673 |
Neg logit contributions | |
and | -0.950 |
, | -0.897 |
. | -0.766 |
said | -0.752 |
came | -0.750 |
Token | . |
Token ID | 13 |
Feature activation | -4.38e+01 |
Pos logit contributions | |
igsaw | +0.341 |
ey | +0.310 |
ifying | +0.308 |
rier | +0.307 |
pill | +0.296 |
Neg logit contributions | |
listened | -0.130 |
and | -0.116 |
promised | -0.112 |
told | -0.108 |
checked | -0.108 |
Token |
|
Token ID | 198 |
Feature activation | -7.16e+01 |
Pos logit contributions | |
igsaw | +0.174 |
nam | +0.156 |
ey | +0.156 |
pill | +0.155 |
saw | +0.154 |
Neg logit contributions | |
and | -0.193 |
, | -0.183 |
came | -0.157 |
said | -0.154 |
. | -0.153 |
Token |
|
Token ID | 198 |
Feature activation | -7.04e+01 |
Pos logit contributions | |
igsaw | +0.382 |
ey | +0.366 |
nam | +0.356 |
saw | +0.345 |
redd | +0.343 |
Neg logit contributions | |
and | -0.234 |
, | -0.214 |
. | -0.168 |
said | -0.166 |
came | -0.164 |
Token | As |
Token ID | 1722 |
Feature activation | -4.17e+01 |
Pos logit contributions | |
igsaw | +0.345 |
ey | +0.334 |
nam | +0.321 |
redd | +0.311 |
iqu | +0.308 |
Neg logit contributions | |
and | -0.262 |
, | -0.241 |
. | -0.196 |
said | -0.194 |
came | -0.191 |
Token | When |
Token ID | 1649 |
Feature activation | +4.99e+01 |
Pos logit contributions | |
and | +0.206 |
, | +0.192 |
. | +0.160 |
said | +0.160 |
came | +0.158 |
Neg logit contributions | |
igsaw | -0.222 |
ey | -0.211 |
nam | -0.202 |
saw | -0.197 |
redd | -0.195 |
Token | she |
Token ID | 673 |
Feature activation | +3.49e+01 |
Pos logit contributions | |
and | +0.186 |
, | +0.173 |
. | +0.140 |
came | +0.138 |
said | +0.138 |
Neg logit contributions | |
igsaw | -0.247 |
ey | -0.235 |
nam | -0.226 |
saw | -0.220 |
pill | -0.219 |
Token | looked |
Token ID | 3114 |
Feature activation | +4.90e+01 |
Pos logit contributions | |
and | +0.064 |
, | +0.051 |
. | +0.036 |
said | +0.030 |
! | +0.023 |
Neg logit contributions | |
ey | -0.219 |
igsaw | -0.213 |
saw | -0.206 |
nam | -0.201 |
pill | -0.197 |
Token | up |
Token ID | 510 |
Feature activation | +5.10e+00 |
Pos logit contributions | |
and | +0.143 |
, | +0.129 |
. | +0.097 |
said | +0.096 |
came | +0.095 |
Neg logit contributions | |
igsaw | -0.278 |
ey | -0.267 |
nam | -0.260 |
saw | -0.254 |
redd | -0.252 |
Token | , |
Token ID | 11 |
Feature activation | +5.37e+01 |
Pos logit contributions | |
and | +0.004 |
gave | +0.004 |
said | +0.004 |
owed | +0.003 |
agreed | +0.003 |
Neg logit contributions | |
ey | -0.032 |
igsaw | -0.031 |
nam | -0.031 |
iring | -0.029 |
outer | -0.029 |
Token | she |
Token ID | 673 |
Feature activation | +3.69e+01 |
Pos logit contributions | |
and | +0.197 |
, | +0.183 |
. | +0.146 |
came | +0.144 |
said | +0.144 |
Neg logit contributions | |
igsaw | -0.275 |
ey | -0.259 |
nam | -0.245 |
pill | -0.243 |
saw | -0.243 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.44e+01 |
Pos logit contributions | |
and | +0.098 |
, | +0.087 |
. | +0.073 |
red | +0.065 |
! | +0.059 |
Neg logit contributions | |
ey | -0.183 |
nam | -0.178 |
rou | -0.174 |
saw | -0.172 |
iqu | -0.171 |
Token | an |
Token ID | 281 |
Feature activation | +2.19e+01 |
Pos logit contributions | |
and | +0.165 |
, | +0.151 |
. | +0.113 |
said | +0.113 |
came | +0.108 |
Neg logit contributions | |
igsaw | -0.339 |
ey | -0.308 |
Deal | -0.305 |
umm | -0.303 |
rier | -0.297 |
Token | unusual |
Token ID | 8468 |
Feature activation | +1.33e+01 |
Pos logit contributions | |
and | +0.117 |
, | +0.111 |
. | +0.097 |
came | +0.097 |
said | +0.097 |
Neg logit contributions | |
igsaw | -0.070 |
ey | -0.066 |
nam | -0.062 |
saw | -0.059 |
redd | -0.058 |
Token | rainbow |
Token ID | 27223 |
Feature activation | +4.17e+01 |
Pos logit contributions | |
and | +0.060 |
, | +0.055 |
agreed | +0.055 |
said | +0.055 |
helped | +0.053 |
Neg logit contributions | |
ey | -0.046 |
igsaw | -0.045 |
spec | -0.037 |
saw | -0.037 |
gol | -0.037 |
Token | above |
Token ID | 2029 |
Feature activation | +4.93e+01 |
Pos logit contributions | |
and | +0.079 |
, | +0.068 |
said | +0.059 |
promised | +0.058 |
checked | +0.058 |
Neg logit contributions | |
igsaw | -0.236 |
saw | -0.234 |
ey | -0.233 |
pill | -0.224 |
spec | -0.224 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +4.27e+01 |
Pos logit contributions | |
and | +0.150 |
, | +0.136 |
. | +0.108 |
said | +0.105 |
came | +0.103 |
Neg logit contributions | |
igsaw | -0.246 |
ey | -0.237 |
nam | -0.232 |
saw | -0.225 |
redd | -0.224 |
Token | Once |
Token ID | 7454 |
Feature activation | +3.27e+01 |
Pos logit contributions | |
and | +0.158 |
, | +0.147 |
said | +0.125 |
came | +0.121 |
gave | +0.120 |
Neg logit contributions | |
igsaw | -0.188 |
ey | -0.175 |
saw | -0.172 |
pill | -0.170 |
nam | -0.163 |
Token | there |
Token ID | 612 |
Feature activation | +2.49e+01 |
Pos logit contributions | |
, | +0.165 |
and | +0.162 |
. | +0.138 |
said | +0.128 |
came | +0.124 |
Neg logit contributions | |
ey | -0.139 |
igsaw | -0.139 |
umm | -0.138 |
pill | -0.136 |
redd | -0.132 |
Token | was |
Token ID | 373 |
Feature activation | +3.55e+01 |
Pos logit contributions | |
and | +0.055 |
, | +0.039 |
whispered | +0.034 |
gave | +0.033 |
said | +0.032 |
Neg logit contributions | |
igsaw | -0.127 |
ey | -0.126 |
spec | -0.124 |
nam | -0.122 |
saw | -0.121 |
Token | a |
Token ID | 257 |
Feature activation | +1.89e+01 |
Pos logit contributions | |
and | +0.091 |
, | +0.070 |
. | +0.049 |
went | +0.030 |
said | +0.027 |
Neg logit contributions | |
igsaw | -0.265 |
rier | -0.260 |
ey | -0.260 |
iqu | -0.257 |
umm | -0.252 |
Token | lion |
Token ID | 18744 |
Feature activation | +3.36e+01 |
Pos logit contributions | |
and | +0.079 |
, | +0.073 |
. | +0.061 |
said | +0.056 |
came | +0.055 |
Neg logit contributions | |
igsaw | -0.095 |
ey | -0.093 |
pill | -0.087 |
nam | -0.086 |
iqu | -0.084 |
Token | . |
Token ID | 13 |
Feature activation | +4.90e+01 |
Pos logit contributions | |
shook | +0.089 |
argued | +0.087 |
checked | +0.086 |
agreed | +0.086 |
promised | +0.084 |
Neg logit contributions | |
rou | -0.126 |
Amen | -0.124 |
saw | -0.121 |
pill | -0.121 |
ric | -0.118 |
Token | He |
Token ID | 679 |
Feature activation | +2.67e+01 |
Pos logit contributions | |
and | +0.234 |
, | +0.214 |
said | +0.189 |
came | +0.189 |
got | +0.187 |
Neg logit contributions | |
igsaw | -0.151 |
ey | -0.151 |
saw | -0.143 |
Amen | -0.141 |
redd | -0.140 |
Token | was |
Token ID | 373 |
Feature activation | +3.79e+01 |
Pos logit contributions | |
owed | +0.071 |
and | +0.069 |
argued | +0.068 |
again | +0.068 |
, | +0.068 |
Neg logit contributions | |
pill | -0.103 |
Isa | -0.091 |
spec | -0.090 |
saw | -0.088 |
ey | -0.087 |
Token | very |
Token ID | 845 |
Feature activation | +2.61e+01 |
Pos logit contributions | |
and | +0.126 |
, | +0.118 |
. | +0.087 |
said | +0.085 |
came | +0.083 |
Neg logit contributions | |
igsaw | -0.238 |
ey | -0.220 |
umm | -0.212 |
pill | -0.206 |
saw | -0.205 |
Token | hot |
Token ID | 3024 |
Feature activation | +1.84e+01 |
Pos logit contributions | |
and | +0.107 |
, | +0.106 |
said | +0.088 |
. | +0.086 |
came | +0.082 |
Neg logit contributions | |
igsaw | -0.127 |
ey | -0.121 |
reating | -0.116 |
pill | -0.116 |
umm | -0.112 |
Token | to |
Token ID | 284 |
Feature activation | +2.96e+01 |
Pos logit contributions | |
rou | +0.005 |
saw | +0.005 |
reating | +0.005 |
igsaw | +0.005 |
rier | +0.005 |
Neg logit contributions | |
checked | -0.001 |
looked | -0.001 |
searched | -0.001 |
woke | -0.001 |
got | -0.001 |
Token | you |
Token ID | 345 |
Feature activation | +4.03e+00 |
Pos logit contributions | |
and | +0.137 |
, | +0.127 |
came | +0.109 |
said | +0.109 |
. | +0.106 |
Neg logit contributions | |
igsaw | -0.112 |
nam | -0.105 |
ey | -0.105 |
saw | -0.101 |
pill | -0.097 |
Token | . |
Token ID | 13 |
Feature activation | +4.01e+01 |
Pos logit contributions | |
checked | +0.011 |
examined | +0.011 |
walked | +0.010 |
looked | +0.010 |
searched | +0.010 |
Neg logit contributions | |
rou | -0.018 |
rier | -0.018 |
possible | -0.017 |
ifying | -0.017 |
arl | -0.016 |
Token | Let |
Token ID | 3914 |
Feature activation | +3.80e+01 |
Pos logit contributions | |
and | +0.170 |
, | +0.163 |
. | +0.136 |
said | +0.132 |
gave | +0.126 |
Neg logit contributions | |
igsaw | -0.184 |
ey | -0.171 |
nam | -0.165 |
saw | -0.164 |
reating | -0.164 |
Token | 's |
Token ID | 338 |
Feature activation | +1.24e+01 |
Pos logit contributions | |
and | +0.102 |
, | +0.090 |
. | +0.066 |
came | +0.065 |
said | +0.064 |
Neg logit contributions | |
igsaw | -0.211 |
ey | -0.203 |
saw | -0.198 |
nam | -0.196 |
redd | -0.195 |
Token | share |
Token ID | 2648 |
Feature activation | +4.08e+01 |
Pos logit contributions | |
and | +0.041 |
, | +0.037 |
said | +0.030 |
came | +0.030 |
. | +0.029 |
Neg logit contributions | |
igsaw | -0.062 |
ey | -0.060 |
nam | -0.058 |
saw | -0.057 |
redd | -0.056 |
Token | our |
Token ID | 674 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
owed | +0.172 |
came | +0.170 |
fell | +0.166 |
entered | +0.164 |
spoke | +0.164 |
Neg logit contributions | |
nam | -0.122 |
reating | -0.114 |
ours | -0.106 |
microsc | -0.102 |
rou | -0.102 |
Token | cars |
Token ID | 5006 |
Feature activation | +2.34e+01 |
Pos logit contributions | |
and | +0.005 |
, | +0.005 |
came | +0.004 |
said | +0.004 |
answered | +0.004 |
Neg logit contributions | |
igsaw | -0.003 |
ey | -0.003 |
nam | -0.003 |
saw | -0.003 |
microsc | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +5.21e+01 |
Pos logit contributions | |
came | +0.030 |
and | +0.026 |
answered | +0.025 |
woke | +0.025 |
, | +0.024 |
Neg logit contributions | |
igsaw | -0.151 |
nam | -0.140 |
reating | -0.137 |
saw | -0.136 |
rou | -0.135 |
Token | play |
Token ID | 711 |
Feature activation | +4.40e+01 |
Pos logit contributions | |
and | +0.189 |
, | +0.174 |
. | +0.141 |
said | +0.139 |
came | +0.139 |
Neg logit contributions | |
igsaw | -0.258 |
ey | -0.246 |
nam | -0.239 |
saw | -0.232 |
redd | -0.230 |
Token | together |
Token ID | 1978 |
Feature activation | +1.96e+01 |
Pos logit contributions | |
answered | +0.150 |
shook | +0.136 |
spoke | +0.134 |
checked | +0.134 |
came | +0.133 |
Neg logit contributions | |
nam | -0.169 |
igsaw | -0.168 |
Amen | -0.162 |
gol | -0.161 |
rier | -0.161 |
Token | for |
Token ID | 329 |
Feature activation | +2.38e+01 |
Pos logit contributions | |
shook | +0.002 |
checked | +0.002 |
looked | +0.002 |
woke | +0.002 |
came | +0.002 |
Neg logit contributions | |
igsaw | -0.003 |
rou | -0.003 |
pill | -0.003 |
rier | -0.003 |
gol | -0.003 |
Token | being |
Token ID | 852 |
Feature activation | +1.68e+01 |
Pos logit contributions | |
and | +0.110 |
, | +0.104 |
said | +0.091 |
. | +0.090 |
came | +0.090 |
Neg logit contributions | |
nam | -0.088 |
igsaw | -0.085 |
ey | -0.080 |
redd | -0.073 |
microsc | -0.072 |
Token | a |
Token ID | 257 |
Feature activation | +7.40e+00 |
Pos logit contributions | |
and | +0.076 |
, | +0.070 |
woke | +0.063 |
said | +0.061 |
fell | +0.061 |
Neg logit contributions | |
nam | -0.061 |
igsaw | -0.060 |
redd | -0.056 |
ju | -0.051 |
ey | -0.051 |
Token | good |
Token ID | 922 |
Feature activation | +2.21e+01 |
Pos logit contributions | |
and | +0.037 |
, | +0.034 |
said | +0.031 |
came | +0.030 |
. | +0.030 |
Neg logit contributions | |
igsaw | -0.025 |
ey | -0.022 |
nam | -0.022 |
redd | -0.021 |
rou | -0.021 |
Token | friend |
Token ID | 1545 |
Feature activation | +3.11e+01 |
Pos logit contributions | |
and | +0.100 |
, | +0.095 |
said | +0.089 |
came | +0.085 |
got | +0.083 |
Neg logit contributions | |
igsaw | -0.079 |
ey | -0.073 |
gol | -0.070 |
redd | -0.070 |
umm | -0.066 |
Token | and |
Token ID | 290 |
Feature activation | +4.70e+01 |
Pos logit contributions | |
turned | +0.050 |
squeezed | +0.050 |
fell | +0.049 |
shook | +0.049 |
woke | +0.047 |
Neg logit contributions | |
igsaw | -0.156 |
pill | -0.153 |
rou | -0.149 |
ifying | -0.148 |
gol | -0.147 |
Token | sharing |
Token ID | 7373 |
Feature activation | +2.01e+01 |
Pos logit contributions | |
and | +0.170 |
, | +0.157 |
. | +0.128 |
said | +0.125 |
came | +0.124 |
Neg logit contributions | |
igsaw | -0.213 |
nam | -0.210 |
ey | -0.206 |
redd | -0.194 |
rou | -0.193 |
Token | her |
Token ID | 607 |
Feature activation | +5.91e+00 |
Pos logit contributions | |
and | +0.072 |
, | +0.070 |
fell | +0.069 |
woke | +0.067 |
got | +0.066 |
Neg logit contributions | |
nam | -0.080 |
igsaw | -0.080 |
ey | -0.076 |
reating | -0.072 |
microsc | -0.070 |
Token | treasures |
Token ID | 29561 |
Feature activation | +1.13e+01 |
Pos logit contributions | |
and | +0.030 |
said | +0.030 |
came | +0.030 |
, | +0.030 |
fell | +0.029 |
Neg logit contributions | |
igsaw | -0.016 |
ey | -0.015 |
nam | -0.013 |
redd | -0.013 |
microsc | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | +4.01e+01 |
Pos logit contributions | |
and | +0.022 |
, | +0.020 |
came | +0.019 |
said | +0.018 |
woke | +0.017 |
Neg logit contributions | |
igsaw | -0.070 |
nam | -0.064 |
ey | -0.062 |
redd | -0.061 |
rier | -0.061 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +3.72e+01 |
Pos logit contributions | |
and | +0.133 |
, | +0.122 |
. | +0.096 |
said | +0.094 |
came | +0.093 |
Neg logit contributions | |
igsaw | -0.212 |
ey | -0.204 |
nam | -0.198 |
saw | -0.193 |
redd | -0.191 |
Token | some |
Token ID | 617 |
Feature activation | +3.20e+01 |
Pos logit contributions | |
and | +0.179 |
, | +0.166 |
woke | +0.137 |
came | +0.135 |
. | +0.131 |
Neg logit contributions | |
nam | -0.194 |
igsaw | -0.175 |
redd | -0.170 |
ey | -0.161 |
rou | -0.160 |
Token | more |
Token ID | 517 |
Feature activation | +3.84e+01 |
Pos logit contributions | |
and | +0.143 |
, | +0.134 |
came | +0.133 |
woke | +0.127 |
fell | +0.125 |
Neg logit contributions | |
igsaw | -0.106 |
redd | -0.097 |
nam | -0.096 |
ju | -0.095 |
ey | -0.093 |
Token | ". |
Token ID | 1911 |
Feature activation | +7.68e+01 |
Pos logit contributions | |
and | +0.176 |
, | +0.166 |
came | +0.163 |
woke | +0.155 |
said | +0.152 |
Neg logit contributions | |
igsaw | -0.132 |
nam | -0.124 |
redd | -0.123 |
ey | -0.117 |
ju | -0.114 |
Token | So |
Token ID | 1406 |
Feature activation | +5.54e+01 |
Pos logit contributions | |
and | +0.330 |
, | +0.306 |
came | +0.274 |
woke | +0.262 |
got | +0.262 |
Neg logit contributions | |
igsaw | -0.307 |
ey | -0.270 |
saw | -0.263 |
nam | -0.263 |
pill | -0.262 |
Token | Amy |
Token ID | 14235 |
Feature activation | +2.55e+01 |
Pos logit contributions | |
and | +0.202 |
woke | +0.171 |
came | +0.170 |
checked | +0.166 |
, | +0.159 |
Neg logit contributions | |
igsaw | -0.227 |
nam | -0.204 |
redd | -0.188 |
Deal | -0.184 |
saw | -0.184 |
Token | put |
Token ID | 1234 |
Feature activation | +7.71e+01 |
Pos logit contributions | |
," | +0.045 |
, | +0.045 |
. | +0.044 |
wr | +0.040 |
ged | +0.039 |
Neg logit contributions | |
rou | -0.107 |
iam | -0.100 |
spe | -0.097 |
ju | -0.093 |
concession | -0.092 |
Token | on |
Token ID | 319 |
Feature activation | +5.68e+01 |
Pos logit contributions | |
and | +0.266 |
, | +0.251 |
woke | +0.221 |
came | +0.212 |
said | +0.210 |
Neg logit contributions | |
igsaw | -0.347 |
nam | -0.340 |
redd | -0.318 |
ey | -0.309 |
ju | -0.291 |
Token | her |
Token ID | 607 |
Feature activation | +3.50e+01 |
Pos logit contributions | |
and | +0.204 |
, | +0.189 |
said | +0.159 |
came | +0.158 |
. | +0.151 |
Neg logit contributions | |
igsaw | -0.276 |
nam | -0.260 |
ey | -0.257 |
redd | -0.247 |
saw | -0.240 |
Token | shoes |
Token ID | 10012 |
Feature activation | +3.83e+01 |
Pos logit contributions | |
and | +0.173 |
, | +0.161 |
came | +0.157 |
said | +0.155 |
fell | +0.148 |
Neg logit contributions | |
igsaw | -0.116 |
nam | -0.107 |
ey | -0.103 |
redd | -0.096 |
ju | -0.092 |
Token | and |
Token ID | 290 |
Feature activation | +7.81e+01 |
Pos logit contributions | |
roared | +0.036 |
woke | +0.033 |
came | +0.030 |
listened | +0.029 |
answered | +0.028 |
Neg logit contributions | |
igsaw | -0.238 |
nam | -0.236 |
ifying | -0.218 |
redd | -0.213 |
rou | -0.213 |
Token | happily |
Token ID | 18177 |
Feature activation | +5.28e+01 |
Pos logit contributions | |
and | +0.157 |
, | +0.146 |
. | +0.121 |
! | +0.100 |
owed | +0.091 |
Neg logit contributions | |
nam | -0.458 |
igsaw | -0.414 |
redd | -0.409 |
concession | -0.395 |
ju | -0.394 |
Token | anything |
Token ID | 1997 |
Feature activation | -3.91e+01 |
Pos logit contributions | |
nam | +0.063 |
igsaw | +0.058 |
possible | +0.055 |
ocol | +0.053 |
gol | +0.053 |
Neg logit contributions | |
and | -0.075 |
, | -0.070 |
woke | -0.066 |
said | -0.063 |
came | -0.062 |
Token | ." |
Token ID | 526 |
Feature activation | -1.32e+01 |
Pos logit contributions | |
possible | +0.150 |
rewarding | +0.136 |
pill | +0.136 |
fastest | +0.132 |
rier | +0.129 |
Neg logit contributions | |
owed | -0.119 |
searched | -0.118 |
walked | -0.117 |
woke | -0.115 |
and | -0.114 |
Token |
|
Token ID | 198 |
Feature activation | -4.59e+01 |
Pos logit contributions | |
igsaw | +0.037 |
pill | +0.036 |
gol | +0.033 |
saw | +0.033 |
Amen | +0.032 |
Neg logit contributions | |
and | -0.059 |
, | -0.055 |
came | -0.051 |
got | -0.051 |
heard | -0.051 |
Token |
|
Token ID | 198 |
Feature activation | -4.50e+01 |
Pos logit contributions | |
igsaw | +0.208 |
ey | +0.196 |
saw | +0.188 |
nam | +0.186 |
pill | +0.185 |
Neg logit contributions | |
and | -0.179 |
, | -0.168 |
came | -0.138 |
. | -0.136 |
said | -0.134 |
Token | From |
Token ID | 4863 |
Feature activation | -2.33e+01 |
Pos logit contributions | |
igsaw | +0.133 |
saw | +0.119 |
ey | +0.117 |
pill | +0.111 |
nam | +0.110 |
Neg logit contributions | |
and | -0.246 |
, | -0.238 |
came | -0.207 |
. | -0.206 |
said | -0.202 |
Token | that |
Token ID | 326 |
Feature activation | -2.54e+01 |
Pos logit contributions | |
igsaw | +0.124 |
ey | +0.118 |
nam | +0.115 |
saw | +0.113 |
iqu | +0.110 |
Neg logit contributions | |
and | -0.070 |
, | -0.064 |
came | -0.053 |
. | -0.050 |
got | -0.048 |
Token | day |
Token ID | 1110 |
Feature activation | -2.61e+01 |
Pos logit contributions | |
igsaw | +0.071 |
Wed | +0.055 |
gol | +0.053 |
narrative | +0.053 |
Amen | +0.051 |
Neg logit contributions | |
and | -0.126 |
owed | -0.121 |
came | -0.118 |
, | -0.116 |
entered | -0.116 |
Token | on |
Token ID | 319 |
Feature activation | -2.35e+01 |
Pos logit contributions | |
igsaw | +0.104 |
ifying | +0.102 |
Deal | +0.098 |
nam | +0.098 |
narrative | +0.097 |
Neg logit contributions | |
and | -0.085 |
said | -0.073 |
whispered | -0.070 |
answered | -0.069 |
thought | -0.069 |
Token | , |
Token ID | 11 |
Feature activation | -1.04e+01 |
Pos logit contributions | |
igsaw | +0.162 |
Wed | +0.146 |
ey | +0.145 |
nam | +0.139 |
ocol | +0.139 |
Neg logit contributions | |
and | -0.024 |
whispered | -0.014 |
said | -0.012 |
woke | -0.010 |
came | -0.010 |
Token | Fin |
Token ID | 4463 |
Feature activation | -4.45e+01 |
Pos logit contributions | |
igsaw | +0.040 |
nam | +0.038 |
redd | +0.036 |
saw | +0.035 |
pill | +0.035 |
Neg logit contributions | |
and | -0.042 |
, | -0.038 |
woke | -0.036 |
came | -0.035 |
whispered | -0.035 |
Token | learned |
Token ID | 4499 |
Feature activation | -7.44e-01 |
Pos logit contributions | |
rou | +0.218 |
gol | +0.212 |
ini | +0.209 |
pill | +0.208 |
amental | +0.194 |
Neg logit contributions | |
answered | -0.066 |
owed | -0.055 |
," | -0.052 |
kissed | -0.052 |
whispered | -0.050 |
Token | wearing |
Token ID | 5762 |
Feature activation | +4.55e+01 |
Pos logit contributions | |
and | +0.173 |
, | +0.162 |
came | +0.137 |
woke | +0.136 |
. | +0.135 |
Neg logit contributions | |
nam | -0.162 |
igsaw | -0.159 |
ey | -0.152 |
redd | -0.149 |
saw | -0.149 |
Token | a |
Token ID | 257 |
Feature activation | +2.38e+01 |
Pos logit contributions | |
and | +0.154 |
, | +0.141 |
. | +0.112 |
said | +0.110 |
came | +0.108 |
Neg logit contributions | |
igsaw | -0.239 |
ey | -0.229 |
nam | -0.220 |
saw | -0.217 |
pill | -0.215 |
Token | helmet |
Token ID | 14335 |
Feature activation | +1.71e+01 |
Pos logit contributions | |
and | +0.109 |
, | +0.102 |
. | +0.086 |
said | +0.085 |
came | +0.083 |
Neg logit contributions | |
igsaw | -0.099 |
ey | -0.093 |
pill | -0.091 |
nam | -0.090 |
saw | -0.089 |
Token | and |
Token ID | 290 |
Feature activation | +6.28e+01 |
Pos logit contributions | |
woke | +0.024 |
checked | +0.023 |
came | +0.022 |
squeezed | +0.022 |
entered | +0.021 |
Neg logit contributions | |
rier | -0.087 |
ifying | -0.083 |
gol | -0.083 |
nam | -0.083 |
rou | -0.081 |
Token | a |
Token ID | 257 |
Feature activation | +2.52e+01 |
Pos logit contributions | |
and | +0.234 |
, | +0.220 |
came | +0.184 |
. | +0.181 |
said | +0.174 |
Neg logit contributions | |
igsaw | -0.258 |
ey | -0.257 |
nam | -0.256 |
saw | -0.240 |
pill | -0.240 |
Token | backpack |
Token ID | 22526 |
Feature activation | +3.12e+01 |
Pos logit contributions | |
and | +0.111 |
, | +0.104 |
. | +0.088 |
said | +0.087 |
came | +0.087 |
Neg logit contributions | |
igsaw | -0.105 |
ey | -0.099 |
nam | -0.095 |
saw | -0.092 |
redd | -0.091 |
Token | . |
Token ID | 13 |
Feature activation | +5.52e+01 |
Pos logit contributions | |
woke | +0.061 |
squeezed | +0.055 |
listened | +0.054 |
argued | +0.052 |
checked | +0.051 |
Neg logit contributions | |
saw | -0.136 |
rier | -0.135 |
rou | -0.135 |
reating | -0.130 |
possible | -0.130 |
Token | He |
Token ID | 679 |
Feature activation | +3.29e+01 |
Pos logit contributions | |
and | +0.233 |
, | +0.223 |
woke | +0.199 |
came | +0.197 |
got | +0.192 |
Neg logit contributions | |
igsaw | -0.207 |
saw | -0.195 |
ey | -0.188 |
pill | -0.181 |
redd | -0.180 |
Token | is |
Token ID | 318 |
Feature activation | +4.53e+01 |
Pos logit contributions | |
, | +0.118 |
and | +0.115 |
owed | +0.102 |
. | +0.101 |
came | +0.099 |
Neg logit contributions | |
pill | -0.130 |
ey | -0.127 |
saw | -0.118 |
rou | -0.114 |
concession | -0.114 |
Token | fixing |
Token ID | 18682 |
Feature activation | +4.84e+01 |
Pos logit contributions | |
and | +0.188 |
, | +0.175 |
woke | +0.147 |
came | +0.146 |
. | +0.144 |
Neg logit contributions | |
nam | -0.184 |
igsaw | -0.177 |
ey | -0.168 |
saw | -0.168 |
redd | -0.167 |
Token | his |
Token ID | 465 |
Feature activation | +2.06e+01 |
Pos logit contributions | |
and | +0.173 |
, | +0.161 |
came | +0.149 |
woke | +0.143 |
said | +0.141 |
Neg logit contributions | |
igsaw | -0.216 |
nam | -0.213 |
ey | -0.205 |
redd | -0.193 |
saw | -0.193 |
Token | it |
Token ID | 340 |
Feature activation | +5.16e+00 |
Pos logit contributions | |
and | +0.033 |
, | +0.030 |
. | +0.024 |
said | +0.023 |
came | +0.023 |
Neg logit contributions | |
igsaw | -0.052 |
ey | -0.050 |
nam | -0.049 |
saw | -0.047 |
pill | -0.047 |
Token | shine |
Token ID | 18340 |
Feature activation | +7.91e+00 |
Pos logit contributions | |
and | +0.024 |
, | +0.022 |
came | +0.021 |
said | +0.021 |
answered | +0.020 |
Neg logit contributions | |
igsaw | -0.017 |
nam | -0.016 |
redd | -0.016 |
ju | -0.015 |
outer | -0.015 |
Token | again |
Token ID | 757 |
Feature activation | +8.14e+00 |
Pos logit contributions | |
and | +0.024 |
, | +0.022 |
came | +0.021 |
said | +0.021 |
answered | +0.021 |
Neg logit contributions | |
igsaw | -0.037 |
rier | -0.036 |
redd | -0.036 |
nam | -0.036 |
outer | -0.034 |
Token | !" |
Token ID | 2474 |
Feature activation | +2.07e+01 |
Pos logit contributions | |
came | +0.026 |
checked | +0.023 |
answered | +0.023 |
shook | +0.023 |
hugged | +0.022 |
Neg logit contributions | |
nam | -0.034 |
igsaw | -0.034 |
outer | -0.033 |
redd | -0.032 |
ju | -0.032 |
Token |
|
Token ID | 198 |
Feature activation | -1.21e+01 |
Pos logit contributions | |
remembered | +0.089 |
returned | +0.087 |
kept | +0.086 |
fell | +0.085 |
roared | +0.084 |
Neg logit contributions | |
Wed | -0.046 |
igsaw | -0.044 |
pill | -0.041 |
Sad | -0.039 |
Oh | -0.035 |
Token |
|
Token ID | 198 |
Feature activation | -1.15e+01 |
Pos logit contributions | |
igsaw | +0.053 |
ey | +0.049 |
saw | +0.049 |
pill | +0.047 |
nam | +0.046 |
Neg logit contributions | |
and | -0.044 |
, | -0.041 |
came | -0.035 |
gave | -0.034 |
got | -0.034 |
Token | The |
Token ID | 464 |
Feature activation | -3.35e-01 |
Pos logit contributions | |
saw | +0.025 |
igsaw | +0.025 |
nam | +0.024 |
E | +0.023 |
Sad | +0.022 |
Neg logit contributions | |
and | -0.064 |
, | -0.064 |
came | -0.059 |
answered | -0.057 |
fell | -0.056 |
Token | daughter |
Token ID | 4957 |
Feature activation | +6.53e-01 |
Pos logit contributions | |
igsaw | +0.001 |
ey | +0.001 |
nam | +0.001 |
saw | +0.001 |
pill | +0.001 |
Neg logit contributions | |
and | -0.001 |
, | -0.001 |
. | -0.001 |
said | -0.001 |
came | -0.001 |
Token | was |
Token ID | 373 |
Feature activation | +7.39e+00 |
Pos logit contributions | |
rs | +0.001 |
wr | +0.001 |
died | +0.001 |
use | +0.001 |
hatch | +0.001 |
Neg logit contributions | |
rou | -0.002 |
ott | -0.002 |
gol | -0.002 |
addy | -0.002 |
ini | -0.002 |
Token | excited |
Token ID | 6568 |
Feature activation | +3.23e+00 |
Pos logit contributions | |
and | +0.030 |
, | +0.028 |
came | +0.023 |
. | +0.023 |
said | +0.023 |
Neg logit contributions | |
igsaw | -0.031 |
nam | -0.030 |
ey | -0.029 |
saw | -0.028 |
redd | -0.028 |
Token | to |
Token ID | 284 |
Feature activation | +1.11e+01 |
Pos logit contributions | |
checked | +0.004 |
fell | +0.004 |
held | +0.003 |
woke | +0.003 |
squeezed | +0.003 |
Neg logit contributions | |
igsaw | -0.019 |
reating | -0.018 |
ey | -0.017 |
Wed | -0.017 |
rou | -0.017 |
Token | some |
Token ID | 617 |
Feature activation | -1.13e+01 |
Pos logit contributions | |
igsaw | +0.025 |
ey | +0.023 |
nam | +0.023 |
redd | +0.022 |
pill | +0.022 |
Neg logit contributions | |
and | -0.017 |
, | -0.015 |
. | -0.012 |
said | -0.012 |
came | -0.012 |
Token | cheese |
Token ID | 9891 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
igsaw | +0.031 |
ey | +0.030 |
redd | +0.028 |
ocol | +0.028 |
ju | +0.026 |
Neg logit contributions | |
and | -0.052 |
came | -0.050 |
woke | -0.049 |
ran | -0.048 |
looked | -0.047 |
Token | , |
Token ID | 11 |
Feature activation | +2.43e+01 |
Pos logit contributions | |
igsaw | +0.002 |
ju | +0.002 |
rier | +0.002 |
arl | +0.002 |
redd | +0.002 |
Neg logit contributions | |
rushed | -0.001 |
ran | -0.001 |
stepped | -0.001 |
walked | -0.001 |
watched | -0.001 |
Token | please |
Token ID | 3387 |
Feature activation | -1.05e+01 |
Pos logit contributions | |
and | +0.099 |
, | +0.095 |
came | +0.083 |
. | +0.083 |
said | +0.081 |
Neg logit contributions | |
ey | -0.090 |
igsaw | -0.090 |
nam | -0.086 |
saw | -0.085 |
redd | -0.084 |
Token | ?" |
Token ID | 1701 |
Feature activation | +1.81e+01 |
Pos logit contributions | |
Amen | +0.037 |
Deal | +0.031 |
igsaw | +0.030 |
nam | +0.030 |
Isa | +0.029 |
Neg logit contributions | |
and | -0.040 |
hed | -0.040 |
felt | -0.036 |
ed | -0.036 |
argued | -0.035 |
Token | Sam |
Token ID | 3409 |
Feature activation | -1.71e+00 |
Pos logit contributions | |
and | +0.068 |
, | +0.063 |
came | +0.052 |
got | +0.051 |
. | +0.051 |
Neg logit contributions | |
igsaw | -0.077 |
ey | -0.074 |
saw | -0.072 |
pill | -0.068 |
nam | -0.068 |
Token | said |
Token ID | 531 |
Feature activation | +3.49e+01 |
Pos logit contributions | |
ju | +0.004 |
redd | +0.004 |
Me | +0.004 |
Isa | +0.004 |
nic | +0.004 |
Neg logit contributions | |
oor | -0.006 |
ides | -0.006 |
oves | -0.006 |
eness | -0.006 |
ring | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | +2.71e+01 |
Pos logit contributions | |
and | +0.067 |
whispered | +0.056 |
visited | +0.050 |
checked | +0.048 |
hed | +0.047 |
Neg logit contributions | |
rier | -0.173 |
nam | -0.167 |
downward | -0.165 |
redd | -0.162 |
brav | -0.152 |
Token | " |
Token ID | 366 |
Feature activation | +2.24e+01 |
Pos logit contributions | |
owed | +0.119 |
le | +0.118 |
imagined | +0.118 |
pt | +0.118 |
ered | +0.116 |
Neg logit contributions | |
downward | -0.059 |
Acc | -0.057 |
nam | -0.056 |
rier | -0.052 |
redd | -0.049 |
Token | No |
Token ID | 2949 |
Feature activation | +2.33e+01 |
Pos logit contributions | |
and | +0.118 |
, | +0.111 |
. | +0.097 |
said | +0.096 |
came | +0.096 |
Neg logit contributions | |
igsaw | -0.075 |
ey | -0.070 |
nam | -0.066 |
saw | -0.064 |
redd | -0.063 |
Token | , |
Token ID | 11 |
Feature activation | +2.87e+01 |
Pos logit contributions | |
and | +0.020 |
gave | +0.003 |
went | +0.003 |
came | +0.002 |
, | +0.001 |
Neg logit contributions | |
igsaw | -0.164 |
ey | -0.155 |
microsc | -0.150 |
pill | -0.148 |
saw | -0.147 |
Token | something |
Token ID | 1223 |
Feature activation | +1.93e+01 |
Pos logit contributions | |
woke | +0.080 |
fell | +0.078 |
said | +0.077 |
and | +0.076 |
came | +0.076 |
Neg logit contributions | |
ey | -0.085 |
igsaw | -0.084 |
nam | -0.079 |
redd | -0.077 |
ju | -0.074 |
Token | different |
Token ID | 1180 |
Feature activation | +8.40e+00 |
Pos logit contributions | |
woke | +0.086 |
came | +0.083 |
and | +0.081 |
walked | +0.079 |
listened | +0.078 |
Neg logit contributions | |
igsaw | -0.061 |
rier | -0.059 |
nam | -0.058 |
redd | -0.057 |
ju | -0.056 |
Token | in |
Token ID | 287 |
Feature activation | +1.93e+01 |
Pos logit contributions | |
fell | +0.023 |
woke | +0.022 |
cried | +0.020 |
said | +0.020 |
came | +0.020 |
Neg logit contributions | |
igsaw | -0.040 |
spec | -0.040 |
nam | -0.039 |
ey | -0.039 |
microsc | -0.038 |
Token | her |
Token ID | 607 |
Feature activation | +5.91e+00 |
Pos logit contributions | |
and | +0.068 |
, | +0.063 |
said | +0.053 |
came | +0.052 |
. | +0.050 |
Neg logit contributions | |
igsaw | -0.096 |
ey | -0.091 |
nam | -0.090 |
redd | -0.084 |
outer | -0.083 |
Token | old |
Token ID | 1468 |
Feature activation | -1.53e+01 |
Pos logit contributions | |
and | +0.031 |
said | +0.031 |
told | +0.029 |
gave | +0.029 |
came | +0.029 |
Neg logit contributions | |
ey | -0.017 |
igsaw | -0.016 |
ocol | -0.011 |
redd | -0.011 |
microsc | -0.011 |
Token | yellow |
Token ID | 7872 |
Feature activation | -1.09e+01 |
Pos logit contributions | |
ey | +0.044 |
igsaw | +0.040 |
microsc | +0.037 |
dining | +0.036 |
redd | +0.035 |
Neg logit contributions | |
owed | -0.070 |
said | -0.067 |
fell | -0.067 |
listened | -0.067 |
and | -0.067 |
Token | bowl |
Token ID | 9396 |
Feature activation | +1.54e+01 |
Pos logit contributions | |
ey | +0.042 |
igsaw | +0.040 |
ocol | +0.035 |
umm | +0.032 |
microsc | +0.032 |
Neg logit contributions | |
said | -0.046 |
told | -0.045 |
listened | -0.045 |
cried | -0.044 |
fell | -0.043 |
Token | . |
Token ID | 13 |
Feature activation | +4.10e+01 |
Pos logit contributions | |
woke | +0.030 |
listened | +0.028 |
checked | +0.028 |
rang | +0.028 |
came | +0.027 |
Neg logit contributions | |
igsaw | -0.085 |
umm | -0.079 |
ifying | -0.077 |
ju | -0.076 |
rier | -0.076 |
Token | From |
Token ID | 3574 |
Feature activation | +3.45e+01 |
Pos logit contributions | |
and | +0.178 |
, | +0.168 |
said | +0.143 |
came | +0.142 |
. | +0.140 |
Neg logit contributions | |
igsaw | -0.173 |
ey | -0.161 |
pill | -0.154 |
nam | -0.151 |
saw | -0.150 |
Token | castles |
Token ID | 49203 |
Feature activation | -1.87e+00 |
Pos logit contributions | |
and | +0.117 |
came | +0.108 |
woke | +0.106 |
, | +0.105 |
got | +0.098 |
Neg logit contributions | |
igsaw | -0.146 |
nam | -0.142 |
ey | -0.138 |
rou | -0.137 |
outer | -0.135 |
Token | and |
Token ID | 290 |
Feature activation | +5.16e+01 |
Pos logit contributions | |
igsaw | +0.009 |
reating | +0.009 |
Wed | +0.009 |
rier | +0.009 |
ey | +0.008 |
Neg logit contributions | |
whispered | -0.003 |
cried | -0.003 |
lied | -0.003 |
squeezed | -0.003 |
complained | -0.002 |
Token | sky |
Token ID | 6766 |
Feature activation | -6.57e+01 |
Pos logit contributions | |
igsaw | +0.188 |
nam | +0.185 |
saw | +0.180 |
ey | +0.178 |
redd | +0.177 |
Neg logit contributions | |
and | -0.281 |
, | -0.270 |
came | -0.239 |
said | -0.236 |
. | -0.232 |
Token | was |
Token ID | 373 |
Feature activation | -5.99e+01 |
Pos logit contributions | |
spec | +0.194 |
spe | +0.186 |
vast | +0.168 |
saw | +0.162 |
darker | +0.161 |
Neg logit contributions | |
anas | -0.225 |
ides | -0.216 |
irus | -0.211 |
rums | -0.210 |
", | -0.210 |
Token | blue |
Token ID | 4171 |
Feature activation | -9.93e+01 |
Pos logit contributions | |
igsaw | +0.195 |
ey | +0.189 |
nam | +0.187 |
redd | +0.184 |
saw | +0.178 |
Neg logit contributions | |
and | -0.282 |
, | -0.269 |
. | -0.237 |
came | -0.237 |
said | -0.237 |
Token | and |
Token ID | 290 |
Feature activation | -3.61e+01 |
Pos logit contributions | |
ey | +0.663 |
igsaw | +0.652 |
spec | +0.645 |
saw | +0.633 |
umm | +0.631 |
Neg logit contributions | |
said | -0.034 |
told | -0.033 |
promised | -0.024 |
came | -0.023 |
woke | -0.021 |
Token | the |
Token ID | 262 |
Feature activation | -6.63e+01 |
Pos logit contributions | |
igsaw | +0.160 |
ey | +0.153 |
nam | +0.153 |
redd | +0.146 |
saw | +0.146 |
Neg logit contributions | |
and | -0.142 |
, | -0.132 |
. | -0.110 |
said | -0.108 |
came | -0.107 |
Token | wind |
Token ID | 2344 |
Feature activation | -7.50e+01 |
Pos logit contributions | |
nam | +0.206 |
redd | +0.194 |
ey | +0.184 |
spec | +0.182 |
saw | +0.180 |
Neg logit contributions | |
and | -0.325 |
, | -0.315 |
owed | -0.285 |
came | -0.283 |
answered | -0.277 |
Token | was |
Token ID | 373 |
Feature activation | -5.32e+01 |
Pos logit contributions | |
spec | +0.269 |
spe | +0.240 |
ifying | +0.239 |
ini | +0.234 |
saw | +0.233 |
Neg logit contributions | |
agree | -0.208 |
anas | -0.194 |
, | -0.188 |
So | -0.185 |
searched | -0.180 |
Token | strong |
Token ID | 1913 |
Feature activation | -7.93e+01 |
Pos logit contributions | |
nam | +0.144 |
redd | +0.136 |
outer | +0.124 |
rou | +0.124 |
ju | +0.121 |
Neg logit contributions | |
and | -0.244 |
, | -0.238 |
woke | -0.229 |
owed | -0.222 |
came | -0.215 |
Token | . |
Token ID | 13 |
Feature activation | -3.87e+01 |
Pos logit contributions | |
rier | +0.410 |
spec | +0.408 |
igsaw | +0.394 |
possible | +0.387 |
redd | +0.386 |
Neg logit contributions | |
answered | -0.135 |
owed | -0.134 |
woke | -0.132 |
listened | -0.121 |
hugged | -0.120 |
Token | They |
Token ID | 1119 |
Feature activation | -4.30e+01 |
Pos logit contributions | |
igsaw | +0.163 |
saw | +0.145 |
ey | +0.145 |
pill | +0.145 |
nam | +0.144 |
Neg logit contributions | |
and | -0.160 |
, | -0.152 |
came | -0.129 |
. | -0.126 |
said | -0.125 |
Token | ran |
Token ID | 4966 |
Feature activation | -2.65e+01 |
Pos logit contributions | |
nam | +0.223 |
chard | +0.165 |
redd | +0.159 |
saw | +0.149 |
ju | +0.147 |
Neg logit contributions | |
- | -0.123 |
; | -0.123 |
, | -0.118 |
– | -0.118 |
— | -0.117 |
Token | day |
Token ID | 1110 |
Feature activation | -9.45e+01 |
Pos logit contributions | |
igsaw | +0.327 |
ey | +0.314 |
redd | +0.308 |
saw | +0.297 |
Wed | +0.288 |
Neg logit contributions | |
and | -0.515 |
, | -0.464 |
came | -0.448 |
hugged | -0.434 |
gave | -0.432 |
Token | she |
Token ID | 673 |
Feature activation | -9.21e+01 |
Pos logit contributions | |
Wed | +0.538 |
igsaw | +0.514 |
ocol | +0.494 |
nam | +0.487 |
spec | +0.486 |
Neg logit contributions | |
and | -0.136 |
promised | -0.100 |
agreed | -0.097 |
whispered | -0.097 |
owed | -0.095 |
Token | wanted |
Token ID | 2227 |
Feature activation | -6.13e+01 |
Pos logit contributions | |
saw | +0.414 |
Isa | +0.394 |
concession | +0.394 |
pill | +0.392 |
spec | +0.389 |
Neg logit contributions | |
and | -0.205 |
, | -0.188 |
red | -0.167 |
because | -0.162 |
owed | -0.152 |
Token | to |
Token ID | 284 |
Feature activation | -7.86e+01 |
Pos logit contributions | |
nam | +0.204 |
microsc | +0.200 |
Wed | +0.197 |
possible | +0.190 |
outer | +0.189 |
Neg logit contributions | |
hed | -0.228 |
and | -0.219 |
owed | -0.216 |
agreed | -0.204 |
woke | -0.200 |
Token | play |
Token ID | 711 |
Feature activation | -6.60e+01 |
Pos logit contributions | |
igsaw | +0.307 |
ey | +0.277 |
outer | +0.272 |
saw | +0.268 |
Wed | +0.268 |
Neg logit contributions | |
and | -0.314 |
, | -0.272 |
ed | -0.270 |
came | -0.260 |
red | -0.254 |
Token | with |
Token ID | 351 |
Feature activation | -6.68e+01 |
Pos logit contributions | |
igsaw | +0.278 |
outer | +0.260 |
nam | +0.255 |
ey | +0.252 |
Wed | +0.245 |
Neg logit contributions | |
answered | -0.191 |
promised | -0.190 |
woke | -0.190 |
and | -0.190 |
said | -0.187 |
Token | her |
Token ID | 607 |
Feature activation | -9.51e+01 |
Pos logit contributions | |
igsaw | +0.266 |
nam | +0.263 |
ey | +0.247 |
outer | +0.241 |
Wed | +0.236 |
Neg logit contributions | |
and | -0.272 |
, | -0.250 |
came | -0.230 |
woke | -0.229 |
said | -0.226 |
Token | ball |
Token ID | 2613 |
Feature activation | -6.97e+01 |
Pos logit contributions | |
igsaw | +0.312 |
nam | +0.310 |
ey | +0.290 |
microsc | +0.272 |
pill | +0.262 |
Neg logit contributions | |
and | -0.431 |
, | -0.406 |
came | -0.400 |
woke | -0.396 |
got | -0.386 |
Token | . |
Token ID | 13 |
Feature activation | -5.78e+01 |
Pos logit contributions | |
igsaw | +0.357 |
nam | +0.352 |
reating | +0.323 |
redd | +0.322 |
downward | +0.321 |
Neg logit contributions | |
listened | -0.125 |
woke | -0.123 |
answered | -0.112 |
watched | -0.109 |
checked | -0.108 |
Token | She |
Token ID | 1375 |
Feature activation | -6.81e+01 |
Pos logit contributions | |
igsaw | +0.199 |
pill | +0.179 |
nam | +0.179 |
ey | +0.179 |
saw | +0.171 |
Neg logit contributions | |
and | -0.265 |
, | -0.254 |
came | -0.230 |
listened | -0.223 |
said | -0.221 |
Token | ran |
Token ID | 4966 |
Feature activation | -4.44e+01 |
Pos logit contributions | |
nam | +0.312 |
redd | +0.300 |
Isa | +0.296 |
iam | +0.295 |
saw | +0.294 |
Neg logit contributions | |
, | -0.171 |
and | -0.162 |
. | -0.133 |
! | -0.131 |
because | -0.125 |
Token | picture |
Token ID | 4286 |
Feature activation | -5.89e+01 |
Pos logit contributions | |
igsaw | +0.353 |
nam | +0.333 |
ey | +0.331 |
microsc | +0.304 |
saw | +0.297 |
Neg logit contributions | |
and | -0.339 |
, | -0.324 |
came | -0.294 |
got | -0.275 |
woke | -0.273 |
Token | to |
Token ID | 284 |
Feature activation | -5.89e+01 |
Pos logit contributions | |
reating | +0.205 |
math | +0.189 |
igsaw | +0.185 |
possible | +0.181 |
saw | +0.181 |
Neg logit contributions | |
ged | -0.175 |
anas | -0.172 |
argued | -0.163 |
checked | -0.160 |
fought | -0.158 |
Token | Miss |
Token ID | 4544 |
Feature activation | -7.54e+01 |
Pos logit contributions | |
nam | +0.224 |
igsaw | +0.219 |
ey | +0.216 |
outer | +0.202 |
reating | +0.198 |
Neg logit contributions | |
and | -0.234 |
, | -0.222 |
came | -0.201 |
got | -0.195 |
fell | -0.193 |
Token | Lee |
Token ID | 5741 |
Feature activation | -7.36e+01 |
Pos logit contributions | |
igsaw | +0.286 |
ey | +0.262 |
saw | +0.260 |
nam | +0.255 |
pill | +0.252 |
Neg logit contributions | |
and | -0.321 |
, | -0.299 |
fell | -0.268 |
said | -0.267 |
got | -0.264 |
Token | and |
Token ID | 290 |
Feature activation | -3.61e+01 |
Pos logit contributions | |
reating | +0.338 |
rou | +0.337 |
igsaw | +0.331 |
nam | +0.329 |
microsc | +0.324 |
Neg logit contributions | |
fell | -0.161 |
squeezed | -0.158 |
kept | -0.145 |
woke | -0.143 |
checked | -0.140 |
Token | her |
Token ID | 607 |
Feature activation | -7.69e+01 |
Pos logit contributions | |
nam | +0.164 |
ey | +0.164 |
igsaw | +0.163 |
saw | +0.149 |
pill | +0.149 |
Neg logit contributions | |
and | -0.120 |
, | -0.111 |
came | -0.090 |
. | -0.090 |
got | -0.084 |
Token | friends |
Token ID | 2460 |
Feature activation | -5.06e+01 |
Pos logit contributions | |
igsaw | +0.251 |
ey | +0.250 |
nam | +0.235 |
rou | +0.215 |
gol | +0.207 |
Neg logit contributions | |
came | -0.324 |
and | -0.323 |
got | -0.319 |
woke | -0.316 |
, | -0.313 |
Token | . |
Token ID | 13 |
Feature activation | -4.12e+01 |
Pos logit contributions | |
nam | +0.219 |
rou | +0.208 |
reating | +0.199 |
ifying | +0.195 |
saw | +0.192 |
Neg logit contributions | |
fell | -0.091 |
and | -0.090 |
squeezed | -0.089 |
woke | -0.089 |
checked | -0.088 |
Token | They |
Token ID | 1119 |
Feature activation | -4.54e+01 |
Pos logit contributions | |
igsaw | +0.170 |
ey | +0.160 |
nam | +0.157 |
saw | +0.153 |
pill | +0.149 |
Neg logit contributions | |
and | -0.171 |
, | -0.159 |
came | -0.137 |
got | -0.135 |
said | -0.132 |
Token | say |
Token ID | 910 |
Feature activation | -4.39e+01 |
Pos logit contributions | |
nam | +0.185 |
ey | +0.175 |
igsaw | +0.168 |
saw | +0.166 |
narrative | +0.158 |
Neg logit contributions | |
and | -0.165 |
, | -0.158 |
owed | -0.135 |
came | -0.133 |
ed | -0.132 |
Token | it |
Token ID | 340 |
Feature activation | -4.71e+01 |
Pos logit contributions | |
igsaw | +0.193 |
nam | +0.189 |
microsc | +0.180 |
gol | +0.180 |
reating | +0.179 |
Neg logit contributions | |
and | -0.125 |
searched | -0.107 |
squeezed | -0.106 |
owed | -0.105 |
checked | -0.104 |
Token | . |
Token ID | 13 |
Feature activation | -5.43e+01 |
Pos logit contributions | |
nam | +0.312 |
outer | +0.304 |
igsaw | +0.300 |
microsc | +0.294 |
saw | +0.286 |
Neg logit contributions | |
and | -0.298 |
said | -0.278 |
fell | -0.277 |
woke | -0.276 |
came | -0.274 |
Token |
|
Token ID | 198 |
Feature activation | -8.18e+01 |
Pos logit contributions | |
igsaw | +0.263 |
ey | +0.251 |
nam | +0.243 |
saw | +0.236 |
redd | +0.234 |
Neg logit contributions | |
and | -0.205 |
, | -0.189 |
. | -0.154 |
said | -0.153 |
came | -0.151 |
Token |
|
Token ID | 198 |
Feature activation | -8.01e+01 |
Pos logit contributions | |
igsaw | +0.511 |
nam | +0.499 |
ey | +0.494 |
umm | +0.488 |
redd | +0.475 |
Neg logit contributions | |
and | -0.232 |
, | -0.201 |
. | -0.166 |
said | -0.151 |
came | -0.135 |
Token | They |
Token ID | 2990 |
Feature activation | -6.98e+01 |
Pos logit contributions | |
igsaw | +0.442 |
ey | +0.432 |
nam | +0.419 |
umm | +0.413 |
iqu | +0.404 |
Neg logit contributions | |
and | -0.276 |
, | -0.246 |
. | -0.207 |
said | -0.197 |
came | -0.181 |
Token | climbed |
Token ID | 19952 |
Feature activation | -4.33e+01 |
Pos logit contributions | |
nam | +0.354 |
redd | +0.295 |
microsc | +0.292 |
ey | +0.288 |
Isa | +0.280 |
Neg logit contributions | |
and | -0.186 |
, | -0.167 |
! | -0.147 |
!" | -0.136 |
. | -0.133 |
Token | over |
Token ID | 625 |
Feature activation | -6.41e+01 |
Pos logit contributions | |
igsaw | +0.255 |
ey | +0.244 |
nam | +0.236 |
pill | +0.229 |
saw | +0.229 |
Neg logit contributions | |
and | -0.128 |
, | -0.115 |
. | -0.087 |
said | -0.082 |
came | -0.082 |
Token | the |
Token ID | 262 |
Feature activation | -6.77e+01 |
Pos logit contributions | |
igsaw | +0.275 |
nam | +0.262 |
ey | +0.258 |
saw | +0.258 |
outer | +0.258 |
Neg logit contributions | |
and | -0.198 |
, | -0.194 |
gave | -0.186 |
said | -0.186 |
woke | -0.183 |
Token | fence |
Token ID | 13990 |
Feature activation | -4.86e+01 |
Pos logit contributions | |
igsaw | +0.273 |
ey | +0.262 |
nam | +0.248 |
saw | +0.242 |
redd | +0.236 |
Neg logit contributions | |
and | -0.302 |
, | -0.284 |
said | -0.247 |
came | -0.246 |
. | -0.241 |
Token | and |
Token ID | 290 |
Feature activation | -3.33e+01 |
Pos logit contributions | |
saw | +0.275 |
igsaw | +0.268 |
spec | +0.255 |
downward | +0.253 |
ey | +0.251 |
Neg logit contributions | |
woke | -0.060 |
checked | -0.049 |
told | -0.047 |
listened | -0.047 |
remembered | -0.044 |
Token | went |
Token ID | 1816 |
Feature activation | -3.37e+01 |
Pos logit contributions | |
nam | +0.197 |
saw | +0.178 |
ey | +0.172 |
redd | +0.170 |
spec | +0.165 |
Neg logit contributions | |
and | -0.074 |
, | -0.066 |
. | -0.050 |
! | -0.047 |
- | -0.044 |
Token | to |
Token ID | 284 |
Feature activation | -4.94e+01 |
Pos logit contributions | |
igsaw | +0.200 |
ey | +0.192 |
nam | +0.187 |
saw | +0.185 |
redd | +0.181 |
Neg logit contributions | |
and | -0.086 |
, | -0.076 |
said | -0.056 |
came | -0.055 |
. | -0.054 |
Token | towers |
Token ID | 18028 |
Feature activation | -1.06e+02 |
Pos logit contributions | |
ey | +0.277 |
igsaw | +0.271 |
saw | +0.251 |
pill | +0.242 |
redd | +0.222 |
Neg logit contributions | |
answered | -0.685 |
said | -0.685 |
woke | -0.682 |
and | -0.680 |
gave | -0.673 |
Token | . |
Token ID | 13 |
Feature activation | -6.42e+01 |
Pos logit contributions | |
saw | +0.602 |
rier | +0.560 |
igsaw | +0.553 |
reating | +0.539 |
possible | +0.538 |
Neg logit contributions | |
woke | -0.149 |
answered | -0.129 |
listened | -0.127 |
whispered | -0.126 |
replied | -0.124 |
Token | One |
Token ID | 1881 |
Feature activation | -7.83e+01 |
Pos logit contributions | |
igsaw | +0.255 |
pill | +0.233 |
saw | +0.232 |
ey | +0.229 |
nam | +0.227 |
Neg logit contributions | |
and | -0.272 |
, | -0.257 |
came | -0.220 |
got | -0.215 |
said | -0.214 |
Token | day |
Token ID | 1110 |
Feature activation | -7.25e+01 |
Pos logit contributions | |
igsaw | +0.260 |
saw | +0.243 |
ey | +0.237 |
redd | +0.227 |
nam | +0.222 |
Neg logit contributions | |
and | -0.364 |
, | -0.337 |
said | -0.306 |
came | -0.298 |
gave | -0.295 |
Token | , |
Token ID | 11 |
Feature activation | -5.64e+01 |
Pos logit contributions | |
igsaw | +0.390 |
Wed | +0.379 |
nam | +0.351 |
spec | +0.348 |
ocol | +0.346 |
Neg logit contributions | |
and | -0.121 |
owed | -0.095 |
promised | -0.086 |
watered | -0.085 |
searched | -0.081 |
Token | she |
Token ID | 673 |
Feature activation | -7.20e+01 |
Pos logit contributions | |
nam | +0.227 |
igsaw | +0.222 |
saw | +0.197 |
redd | +0.196 |
ey | +0.195 |
Neg logit contributions | |
and | -0.232 |
, | -0.211 |
said | -0.182 |
came | -0.182 |
. | -0.181 |
Token | started |
Token ID | 2067 |
Feature activation | -5.49e+01 |
Pos logit contributions | |
saw | +0.381 |
ey | +0.376 |
nam | +0.354 |
igsaw | +0.351 |
redd | +0.350 |
Neg logit contributions | |
and | -0.150 |
, | -0.141 |
. | -0.108 |
owed | -0.106 |
answered | -0.098 |
Token | building |
Token ID | 2615 |
Feature activation | -7.58e+01 |
Pos logit contributions | |
igsaw | +0.285 |
ey | +0.274 |
nam | +0.270 |
saw | +0.267 |
pill | +0.256 |
Neg logit contributions | |
and | -0.166 |
, | -0.152 |
said | -0.125 |
. | -0.116 |
came | -0.115 |
Token | a |
Token ID | 257 |
Feature activation | -8.00e+01 |
Pos logit contributions | |
igsaw | +0.305 |
ey | +0.298 |
saw | +0.298 |
nam | +0.289 |
redd | +0.274 |
Neg logit contributions | |
and | -0.270 |
, | -0.259 |
said | -0.252 |
woke | -0.240 |
gave | -0.238 |
Token | big |
Token ID | 1263 |
Feature activation | -1.08e+02 |
Pos logit contributions | |
igsaw | +0.320 |
ey | +0.310 |
pill | +0.296 |
nam | +0.292 |
Deal | +0.285 |
Neg logit contributions | |
and | -0.383 |
, | -0.363 |
. | -0.312 |
came | -0.293 |
said | -0.291 |
Token | tower |
Token ID | 10580 |
Feature activation | -8.11e+01 |
Pos logit contributions | |
igsaw | +0.355 |
ey | +0.327 |
saw | +0.298 |
nam | +0.286 |
redd | +0.285 |
Neg logit contributions | |
and | -0.542 |
, | -0.504 |
said | -0.480 |
came | -0.464 |
. | -0.452 |
Token | Re |
Token ID | 3041 |
Feature activation | -1.13e+02 |
Pos logit contributions | |
igsaw | +0.420 |
ey | +0.405 |
nam | +0.392 |
redd | +0.381 |
iqu | +0.374 |
Neg logit contributions | |
and | -0.334 |
, | -0.308 |
. | -0.253 |
said | -0.250 |
came | -0.245 |
Token | becca |
Token ID | 20627 |
Feature activation | -9.64e+01 |
Pos logit contributions | |
igsaw | +0.278 |
ey | +0.233 |
saw | +0.232 |
rou | +0.224 |
iqu | +0.224 |
Neg logit contributions | |
and | -0.693 |
, | -0.665 |
came | -0.608 |
. | -0.584 |
woke | -0.582 |
Token | put |
Token ID | 1234 |
Feature activation | -4.28e+01 |
Pos logit contributions | |
pill | +0.350 |
saw | +0.346 |
iam | +0.335 |
spec | +0.333 |
rou | +0.324 |
Neg logit contributions | |
because | -0.205 |
eness | -0.166 |
than | -0.163 |
anas | -0.156 |
, | -0.156 |
Token | the |
Token ID | 262 |
Feature activation | -7.35e+01 |
Pos logit contributions | |
nam | +0.189 |
igsaw | +0.183 |
ey | +0.181 |
redd | +0.176 |
ocol | +0.165 |
Neg logit contributions | |
and | -0.144 |
, | -0.137 |
woke | -0.131 |
came | -0.118 |
listened | -0.116 |
Token | l |
Token ID | 300 |
Feature activation | -6.83e+01 |
Pos logit contributions | |
ey | +0.257 |
igsaw | +0.253 |
redd | +0.240 |
nam | +0.236 |
ju | +0.216 |
Neg logit contributions | |
and | -0.348 |
, | -0.333 |
came | -0.306 |
said | -0.300 |
gave | -0.286 |
Token | oll |
Token ID | 692 |
Feature activation | -1.58e+02 |
Pos logit contributions | |
igsaw | +0.205 |
ey | +0.192 |
pill | +0.177 |
saw | +0.171 |
nam | +0.169 |
Neg logit contributions | |
and | -0.378 |
, | -0.360 |
said | -0.318 |
came | -0.315 |
. | -0.313 |
Token | ipop |
Token ID | 42800 |
Feature activation | -8.74e+01 |
Pos logit contributions | |
igsaw | +0.461 |
ocol | +0.446 |
ey | +0.441 |
nam | +0.435 |
pill | +0.408 |
Neg logit contributions | |
and | -0.755 |
, | -0.685 |
came | -0.644 |
said | -0.626 |
. | -0.616 |
Token | down |
Token ID | 866 |
Feature activation | -7.11e+01 |
Pos logit contributions | |
nam | +0.327 |
igsaw | +0.300 |
redd | +0.283 |
ifying | +0.278 |
ju | +0.276 |
Neg logit contributions | |
woke | -0.296 |
answered | -0.290 |
came | -0.290 |
guessed | -0.284 |
told | -0.275 |
Token | and |
Token ID | 290 |
Feature activation | -3.47e+01 |
Pos logit contributions | |
nam | +0.312 |
igsaw | +0.309 |
rier | +0.305 |
rou | +0.304 |
iring | +0.299 |
Neg logit contributions | |
woke | -0.186 |
anas | -0.168 |
rang | -0.148 |
roared | -0.147 |
ate | -0.146 |
Token | tried |
Token ID | 3088 |
Feature activation | -3.12e+01 |
Pos logit contributions | |
nam | +0.179 |
redd | +0.160 |
ju | +0.157 |
saw | +0.153 |
ey | +0.153 |
Neg logit contributions | |
and | -0.081 |
, | -0.079 |
. | -0.066 |
; | -0.063 |
- | -0.063 |
Token | to |
Token ID | 284 |
Feature activation | -5.06e+01 |
Pos logit contributions | |
igsaw | +0.121 |
microsc | +0.116 |
downward | +0.113 |
nam | +0.113 |
rou | +0.113 |
Neg logit contributions | |
woke | -0.104 |
said | -0.097 |
owed | -0.095 |
came | -0.094 |
went | -0.094 |
Token | and |
Token ID | 290 |
Feature activation | -2.73e+01 |
Pos logit contributions | |
rier | +0.287 |
igsaw | +0.285 |
iring | +0.276 |
rou | +0.273 |
arl | +0.266 |
Neg logit contributions | |
talked | -0.155 |
hed | -0.148 |
entered | -0.144 |
counted | -0.143 |
woke | -0.140 |
Token | her |
Token ID | 607 |
Feature activation | -6.83e+01 |
Pos logit contributions | |
nam | +0.120 |
ey | +0.107 |
iqu | +0.101 |
redd | +0.100 |
rou | +0.099 |
Neg logit contributions | |
and | -0.099 |
, | -0.090 |
. | -0.073 |
! | -0.070 |
owed | -0.067 |
Token | face |
Token ID | 1986 |
Feature activation | -4.97e+01 |
Pos logit contributions | |
igsaw | +0.221 |
ey | +0.220 |
nam | +0.196 |
iqu | +0.186 |
rou | +0.184 |
Neg logit contributions | |
and | -0.310 |
, | -0.294 |
came | -0.286 |
got | -0.272 |
gave | -0.266 |
Token | began |
Token ID | 2540 |
Feature activation | -3.44e+01 |
Pos logit contributions | |
iqu | +0.174 |
rou | +0.160 |
spec | +0.149 |
ifying | +0.146 |
triangular | +0.141 |
Neg logit contributions | |
", | -0.142 |
!" | -0.133 |
agree | -0.130 |
irus | -0.127 |
!", | -0.126 |
Token | to |
Token ID | 284 |
Feature activation | -4.13e+01 |
Pos logit contributions | |
ey | +0.141 |
igsaw | +0.140 |
nam | +0.139 |
microsc | +0.133 |
rou | +0.133 |
Neg logit contributions | |
and | -0.103 |
, | -0.094 |
woke | -0.090 |
said | -0.087 |
owed | -0.085 |
Token | scr |
Token ID | 6040 |
Feature activation | -1.62e+02 |
Pos logit contributions | |
nam | +0.115 |
ey | +0.111 |
redd | +0.092 |
igsaw | +0.084 |
ju | +0.082 |
Neg logit contributions | |
ed | -0.196 |
es | -0.185 |
and | -0.174 |
anas | -0.168 |
ar | -0.166 |
Token | unch |
Token ID | 3316 |
Feature activation | -5.12e+01 |
Pos logit contributions | |
nam | +0.710 |
igsaw | +0.689 |
ey | +0.651 |
redd | +0.648 |
spec | +0.648 |
Neg logit contributions | |
and | -0.690 |
, | -0.638 |
. | -0.537 |
said | -0.503 |
! | -0.475 |
Token | up |
Token ID | 510 |
Feature activation | -7.55e+01 |
Pos logit contributions | |
rier | +0.219 |
redd | +0.208 |
igsaw | +0.202 |
saw | +0.199 |
ifying | +0.198 |
Neg logit contributions | |
and | -0.158 |
, | -0.151 |
answered | -0.149 |
told | -0.146 |
checked | -0.146 |
Token | . |
Token ID | 13 |
Feature activation | -2.77e+01 |
Pos logit contributions | |
rier | +0.367 |
redd | +0.349 |
nam | +0.346 |
igsaw | +0.338 |
ju | +0.335 |
Neg logit contributions | |
anas | -0.187 |
owed | -0.179 |
woke | -0.175 |
came | -0.169 |
listened | -0.166 |
Token | Hot |
Token ID | 6964 |
Feature activation | -3.08e+01 |
Pos logit contributions | |
igsaw | +0.089 |
pill | +0.078 |
ey | +0.076 |
Wed | +0.073 |
nam | +0.072 |
Neg logit contributions | |
and | -0.133 |
, | -0.126 |
came | -0.118 |
got | -0.115 |
woke | -0.114 |
Token | tears |
Token ID | 10953 |
Feature activation | -3.81e+01 |
Pos logit contributions | |
pill | +0.117 |
igsaw | +0.109 |
spec | +0.098 |
rou | +0.096 |
ey | +0.093 |
Neg logit contributions | |
and | -0.109 |
told | -0.104 |
promised | -0.103 |
searched | -0.103 |
whispered | -0.101 |
Token | broken |
Token ID | 5445 |
Feature activation | -8.64e+01 |
Pos logit contributions | |
igsaw | +0.299 |
ey | +0.292 |
nam | +0.290 |
redd | +0.280 |
ju | +0.255 |
Neg logit contributions | |
and | -0.414 |
, | -0.407 |
came | -0.379 |
said | -0.365 |
answered | -0.356 |
Token | parts |
Token ID | 3354 |
Feature activation | -7.51e+01 |
Pos logit contributions | |
igsaw | +0.289 |
ey | +0.258 |
nam | +0.240 |
outer | +0.230 |
spec | +0.228 |
Neg logit contributions | |
listened | -0.374 |
owed | -0.353 |
said | -0.352 |
talked | -0.351 |
came | -0.349 |
Token | together |
Token ID | 1978 |
Feature activation | -8.43e+01 |
Pos logit contributions | |
igsaw | +0.357 |
possible | +0.332 |
nam | +0.320 |
ifying | +0.320 |
rier | +0.305 |
Neg logit contributions | |
woke | -0.212 |
listened | -0.190 |
talked | -0.188 |
came | -0.187 |
checked | -0.184 |
Token | . |
Token ID | 13 |
Feature activation | -5.78e+01 |
Pos logit contributions | |
igsaw | +0.349 |
possible | +0.348 |
nam | +0.332 |
outer | +0.324 |
ifying | +0.315 |
Neg logit contributions | |
woke | -0.220 |
rang | -0.216 |
replied | -0.211 |
listened | -0.205 |
cried | -0.205 |
Token | Per |
Token ID | 2448 |
Feature activation | -1.06e+02 |
Pos logit contributions | |
igsaw | +0.242 |
pill | +0.218 |
saw | +0.209 |
nam | +0.204 |
ey | +0.204 |
Neg logit contributions | |
and | -0.244 |
, | -0.232 |
came | -0.201 |
got | -0.194 |
. | -0.189 |
Token | c |
Token ID | 66 |
Feature activation | -1.68e+02 |
Pos logit contributions | |
igsaw | +0.358 |
pill | +0.337 |
gol | +0.337 |
rou | +0.328 |
itable | +0.302 |
Neg logit contributions | |
and | -0.525 |
, | -0.498 |
. | -0.436 |
fell | -0.432 |
came | -0.407 |
Token | ival |
Token ID | 2473 |
Feature activation | -8.26e+01 |
Pos logit contributions | |
rou | +0.753 |
pill | +0.720 |
ini | +0.712 |
ric | +0.694 |
igsaw | +0.680 |
Neg logit contributions | |
and | -0.482 |
, | -0.450 |
whispered | -0.390 |
read | -0.386 |
gave | -0.384 |
Token | said |
Token ID | 531 |
Feature activation | -4.11e+01 |
Pos logit contributions | |
spe | +0.338 |
gol | +0.316 |
spec | +0.308 |
igsaw | +0.290 |
concession | +0.290 |
Neg logit contributions | |
int | -0.211 |
owed | -0.181 |
and | -0.176 |
woke | -0.172 |
use | -0.171 |
Token | : |
Token ID | 25 |
Feature activation | -3.80e+01 |
Pos logit contributions | |
rier | +0.187 |
igsaw | +0.180 |
nam | +0.179 |
gol | +0.175 |
Ag | +0.169 |
Neg logit contributions | |
and | -0.106 |
checked | -0.097 |
fell | -0.095 |
squeezed | -0.091 |
whispered | -0.090 |
Token | " |
Token ID | 366 |
Feature activation | -5.18e+01 |
Pos logit contributions | |
igsaw | +0.133 |
Amen | +0.119 |
Wed | +0.118 |
gol | +0.114 |
nam | +0.112 |
Neg logit contributions | |
and | -0.155 |
owed | -0.151 |
fell | -0.148 |
hid | -0.146 |
got | -0.144 |
Token | Now |
Token ID | 3844 |
Feature activation | -3.31e+01 |
Pos logit contributions | |
igsaw | +0.136 |
pill | +0.123 |
nam | +0.121 |
saw | +0.120 |
redd | +0.118 |
Neg logit contributions | |
and | -0.301 |
, | -0.301 |
. | -0.256 |
came | -0.250 |
said | -0.249 |
Token | Every |
Token ID | 3887 |
Feature activation | -6.55e+01 |
Pos logit contributions | |
igsaw | +0.224 |
ey | +0.199 |
pill | +0.196 |
saw | +0.194 |
nam | +0.193 |
Neg logit contributions | |
and | -0.234 |
, | -0.222 |
came | -0.189 |
. | -0.185 |
said | -0.184 |
Token | night |
Token ID | 1755 |
Feature activation | -6.60e+01 |
Pos logit contributions | |
igsaw | +0.216 |
ey | +0.171 |
outer | +0.169 |
Wed | +0.167 |
Amen | +0.167 |
Neg logit contributions | |
and | -0.317 |
, | -0.289 |
said | -0.273 |
owed | -0.267 |
gave | -0.263 |
Token | he |
Token ID | 339 |
Feature activation | -6.79e+01 |
Pos logit contributions | |
spec | +0.293 |
Wed | +0.291 |
igsaw | +0.279 |
outer | +0.267 |
rier | +0.259 |
Neg logit contributions | |
and | -0.146 |
owed | -0.139 |
argued | -0.135 |
agreed | -0.134 |
whispered | -0.128 |
Token | would |
Token ID | 561 |
Feature activation | -6.16e+01 |
Pos logit contributions | |
rou | +0.213 |
iam | +0.205 |
outer | +0.197 |
spe | +0.194 |
spir | +0.190 |
Neg logit contributions | |
, | -0.218 |
and | -0.197 |
; | -0.187 |
red | -0.184 |
! | -0.179 |
Token | bat |
Token ID | 7365 |
Feature activation | -6.05e+01 |
Pos logit contributions | |
igsaw | +0.208 |
outer | +0.197 |
Amen | +0.193 |
ey | +0.189 |
Wed | +0.176 |
Neg logit contributions | |
ed | -0.234 |
and | -0.215 |
red | -0.204 |
said | -0.201 |
, | -0.196 |
Token | he |
Token ID | 258 |
Feature activation | -1.50e+02 |
Pos logit contributions | |
igsaw | +0.069 |
concession | +0.031 |
reating | +0.029 |
Wed | +0.025 |
gol | +0.012 |
Neg logit contributions | |
answered | -0.382 |
told | -0.374 |
touched | -0.372 |
whispered | -0.371 |
said | -0.364 |
Token | in |
Token ID | 287 |
Feature activation | -6.29e+01 |
Pos logit contributions | |
igsaw | +0.740 |
Wed | +0.688 |
nam | +0.675 |
rier | +0.651 |
math | +0.648 |
Neg logit contributions | |
checked | -0.334 |
answered | -0.310 |
came | -0.303 |
said | -0.298 |
touched | -0.289 |
Token | the |
Token ID | 262 |
Feature activation | -6.49e+01 |
Pos logit contributions | |
igsaw | +0.234 |
outer | +0.214 |
nam | +0.207 |
ey | +0.206 |
Wed | +0.195 |
Neg logit contributions | |
owed | -0.256 |
said | -0.242 |
and | -0.241 |
came | -0.241 |
listened | -0.237 |
Token | tub |
Token ID | 12202 |
Feature activation | -7.35e+01 |
Pos logit contributions | |
igsaw | +0.224 |
ey | +0.209 |
nam | +0.194 |
saw | +0.187 |
redd | +0.186 |
Neg logit contributions | |
and | -0.332 |
, | -0.314 |
said | -0.275 |
came | -0.272 |
. | -0.269 |
Token | with |
Token ID | 351 |
Feature activation | -4.26e+01 |
Pos logit contributions | |
igsaw | +0.410 |
rier | +0.408 |
rou | +0.390 |
dining | +0.381 |
umm | +0.374 |
Neg logit contributions | |
woke | -0.085 |
listened | -0.079 |
whispered | -0.079 |
squeezed | -0.078 |
rang | -0.078 |
Token | special |
Token ID | 2041 |
Feature activation | -6.78e+01 |
Pos logit contributions | |
igsaw | +0.140 |
nam | +0.140 |
outer | +0.129 |
ey | +0.126 |
microsc | +0.125 |
Neg logit contributions | |
and | -0.181 |
, | -0.170 |
owed | -0.158 |
said | -0.156 |
answered | -0.153 |
Token | turned |
Token ID | 2900 |
Feature activation | -3.65e+01 |
Pos logit contributions | |
Amen | +0.333 |
spec | +0.319 |
pill | +0.317 |
redd | +0.317 |
Looks | +0.317 |
Neg logit contributions | |
, | -0.166 |
and | -0.165 |
. | -0.106 |
owed | -0.100 |
! | -0.098 |
Token | out |
Token ID | 503 |
Feature activation | -8.11e+01 |
Pos logit contributions | |
ifying | +0.128 |
rier | +0.124 |
ju | +0.120 |
redd | +0.118 |
igsaw | +0.117 |
Neg logit contributions | |
agree | -0.135 |
owed | -0.131 |
anas | -0.129 |
counted | -0.127 |
read | -0.126 |
Token | that |
Token ID | 326 |
Feature activation | -6.38e+01 |
Pos logit contributions | |
igsaw | +0.345 |
rier | +0.339 |
outer | +0.338 |
ifying | +0.328 |
rou | +0.328 |
Neg logit contributions | |
ops | -0.213 |
anas | -0.202 |
talked | -0.190 |
hed | -0.190 |
owed | -0.186 |
Token | the |
Token ID | 262 |
Feature activation | -7.35e+01 |
Pos logit contributions | |
nam | +0.223 |
redd | +0.201 |
ju | +0.201 |
outer | +0.197 |
possible | +0.196 |
Neg logit contributions | |
and | -0.249 |
owed | -0.228 |
, | -0.218 |
red | -0.216 |
anas | -0.215 |
Token | av |
Token ID | 1196 |
Feature activation | -2.05e+02 |
Pos logit contributions | |
igsaw | +0.263 |
ey | +0.255 |
nam | +0.249 |
redd | +0.243 |
saw | +0.242 |
Neg logit contributions | |
and | -0.333 |
, | -0.319 |
came | -0.275 |
said | -0.271 |
. | -0.267 |
Token | oc |
Token ID | 420 |
Feature activation | -1.82e+02 |
Pos logit contributions | |
igsaw | +1.034 |
ey | +0.986 |
nam | +0.984 |
saw | +0.940 |
iqu | +0.938 |
Neg logit contributions | |
and | -0.779 |
, | -0.717 |
. | -0.559 |
said | -0.554 |
came | -0.553 |
Token | ados |
Token ID | 22484 |
Feature activation | -8.30e+01 |
Pos logit contributions | |
igsaw | +0.588 |
ey | +0.545 |
nam | +0.505 |
pill | +0.491 |
saw | +0.489 |
Neg logit contributions | |
and | -0.969 |
, | -0.914 |
said | -0.811 |
came | -0.805 |
. | -0.804 |
Token | were |
Token ID | 547 |
Feature activation | -5.99e+01 |
Pos logit contributions | |
pill | +0.229 |
ju | +0.225 |
igator | +0.221 |
glers | +0.220 |
ey | +0.218 |
Neg logit contributions | |
anas | -0.234 |
red | -0.231 |
irus | -0.211 |
aron | -0.205 |
and | -0.200 |
Token | bad |
Token ID | 2089 |
Feature activation | -7.07e+01 |
Pos logit contributions | |
nam | +0.171 |
rou | +0.152 |
ju | +0.151 |
redd | +0.147 |
possible | +0.145 |
Neg logit contributions | |
owed | -0.253 |
and | -0.245 |
woke | -0.243 |
, | -0.237 |
replied | -0.233 |
Token | and |
Token ID | 290 |
Feature activation | -3.33e+01 |
Pos logit contributions | |
pill | +0.319 |
spec | +0.318 |
igsaw | +0.312 |
umm | +0.311 |
rou | +0.303 |
Neg logit contributions | |
touched | -0.137 |
answered | -0.137 |
whispered | -0.136 |
held | -0.136 |
reached | -0.134 |
Token | made |
Token ID | 925 |
Feature activation | -3.62e+01 |
Pos logit contributions | |
nam | +0.151 |
redd | +0.132 |
ey | +0.129 |
igsaw | +0.126 |
ju | +0.125 |
Neg logit contributions | |
and | -0.122 |
, | -0.112 |
. | -0.093 |
owed | -0.089 |
woke | -0.085 |