Token | Once |
Token ID | 7454 |
Feature activation | +2.27e-01 |
Pos logit contributions | |
ride | +0.218 |
shortcut | +0.217 |
grade | +0.217 |
ork | +0.215 |
dictionary | +0.213 |
Neg logit contributions | |
i | -0.283 |
aming | -0.270 |
gigg | -0.255 |
ink | -0.245 |
us | -0.242 |
Token | upon |
Token ID | 2402 |
Feature activation | +8.04e+00 |
Pos logit contributions | |
Circ | +0.006 |
growth | +0.006 |
Across | +0.006 |
This | +0.006 |
ride | +0.006 |
Neg logit contributions | |
i | -0.006 |
impressed | -0.006 |
glad | -0.006 |
anut | -0.005 |
pleased | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | +4.07e+00 |
Pos logit contributions | |
There | +0.185 |
Circ | +0.181 |
This | +0.180 |
Across | +0.174 |
To | +0.174 |
Neg logit contributions | |
anut | -0.228 |
impressed | -0.224 |
i | -0.220 |
careful | -0.219 |
glad | -0.218 |
Token | time |
Token ID | 640 |
Feature activation | +6.43e+00 |
Pos logit contributions | |
There | +0.042 |
This | +0.040 |
ride | +0.036 |
Circ | +0.036 |
On | +0.036 |
Neg logit contributions | |
anut | -0.174 |
i | -0.170 |
bled | -0.169 |
impressed | -0.168 |
bly | -0.167 |
Token | , |
Token ID | 11 |
Feature activation | -5.40e+00 |
Pos logit contributions | |
There | +0.185 |
This | +0.183 |
Circ | +0.175 |
Where | +0.174 |
To | +0.174 |
Neg logit contributions | |
anut | -0.159 |
i | -0.151 |
impressed | -0.148 |
bly | -0.144 |
bled | -0.143 |
Token | there |
Token ID | 612 |
Feature activation | +1.67e+01 |
Pos logit contributions | |
anut | +0.198 |
i | +0.191 |
impressed | +0.188 |
bled | +0.183 |
years | +0.183 |
Neg logit contributions | |
There | -0.094 |
This | -0.091 |
Where | -0.084 |
Across | -0.084 |
Circ | -0.083 |
Token | was |
Token ID | 373 |
Feature activation | -2.60e-01 |
Pos logit contributions | |
There | +0.419 |
This | +0.410 |
Circ | +0.408 |
To | +0.394 |
Across | +0.394 |
Neg logit contributions | |
anut | -0.453 |
i | -0.446 |
impressed | -0.436 |
bled | -0.422 |
bly | -0.418 |
Token | a |
Token ID | 257 |
Feature activation | +3.90e+00 |
Pos logit contributions | |
anut | +0.010 |
i | +0.010 |
impressed | +0.010 |
ising | +0.010 |
ushed | +0.010 |
Neg logit contributions | |
There | -0.004 |
This | -0.004 |
Circ | -0.004 |
On | -0.004 |
Under | -0.003 |
Token | graceful |
Token ID | 44363 |
Feature activation | -4.67e+00 |
Pos logit contributions | |
ride | +0.083 |
university | +0.081 |
There | +0.078 |
visit | +0.077 |
Circ | +0.075 |
Neg logit contributions | |
bled | -0.127 |
i | -0.124 |
anut | -0.123 |
years | -0.121 |
bly | -0.120 |
Token | hero |
Token ID | 4293 |
Feature activation | +6.64e-01 |
Pos logit contributions | |
anut | +0.149 |
i | +0.145 |
impressed | +0.142 |
bly | +0.138 |
bled | +0.138 |
Neg logit contributions | |
There | -0.098 |
This | -0.096 |
Circ | -0.093 |
Where | -0.091 |
To | -0.091 |
Token | named |
Token ID | 3706 |
Feature activation | -1.13e+00 |
Pos logit contributions | |
There | +0.017 |
Circ | +0.017 |
Across | +0.016 |
This | +0.016 |
Where | +0.016 |
Neg logit contributions | |
i | -0.018 |
anut | -0.018 |
impressed | -0.018 |
glad | -0.017 |
careful | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | +6.88e+00 |
Pos logit contributions | |
There | +0.063 |
This | +0.061 |
Circ | +0.060 |
Where | +0.058 |
To | +0.058 |
Neg logit contributions | |
anut | -0.073 |
i | -0.070 |
impressed | -0.069 |
bled | -0.067 |
bly | -0.067 |
Token | â |
Token ID | 22940 |
Feature activation | -9.69e+00 |
Pos logit contributions | |
Circ | +0.132 |
ork | +0.129 |
hike | +0.128 |
ride | +0.122 |
growth | +0.120 |
Neg logit contributions | |
glad | -0.217 |
i | -0.213 |
impressed | -0.212 |
anut | -0.212 |
careful | -0.207 |
Token | € |
Token ID | 26391 |
Feature activation | -7.74e+00 |
Pos logit contributions | |
anut | +0.268 |
i | +0.260 |
impressed | +0.253 |
bly | +0.246 |
bled | +0.246 |
Neg logit contributions | |
There | -0.245 |
This | -0.237 |
Circ | -0.237 |
Where | -0.230 |
Across | -0.229 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.04e+01 |
Pos logit contributions | |
glad | +0.238 |
impressed | +0.235 |
anut | +0.233 |
careful | +0.230 |
i | +0.230 |
Neg logit contributions | |
Circ | -0.163 |
growth | -0.154 |
Across | -0.147 |
corner | -0.142 |
There | -0.140 |
Token | Once |
Token ID | 7454 |
Feature activation | +1.09e-02 |
Pos logit contributions | |
dictionary | +0.225 |
Circ | +0.223 |
grade | +0.222 |
university | +0.222 |
ride | +0.219 |
Neg logit contributions | |
i | -0.268 |
aming | -0.258 |
gigg | -0.253 |
m | -0.241 |
glad | -0.237 |
Token | there |
Token ID | 612 |
Feature activation | +1.65e+01 |
Pos logit contributions | |
Circ | +0.000 |
growth | +0.000 |
This | +0.000 |
There | +0.000 |
Across | +0.000 |
Neg logit contributions | |
i | -0.000 |
impressed | -0.000 |
anut | -0.000 |
glad | -0.000 |
pleased | -0.000 |
Token | was |
Token ID | 373 |
Feature activation | -2.98e-01 |
Pos logit contributions | |
There | +0.367 |
This | +0.361 |
Where | +0.340 |
ride | +0.338 |
To | +0.337 |
Neg logit contributions | |
anut | -0.521 |
i | -0.493 |
impressed | -0.488 |
bly | -0.484 |
bled | -0.480 |
Token | a |
Token ID | 257 |
Feature activation | +3.69e+00 |
Pos logit contributions | |
anut | +0.013 |
i | +0.012 |
ushed | +0.012 |
maid | +0.012 |
ising | +0.012 |
Neg logit contributions | |
There | -0.003 |
This | -0.003 |
On | -0.003 |
to | -0.003 |
Under | -0.003 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.49e+00 |
Pos logit contributions | |
ride | +0.083 |
There | +0.081 |
Circ | +0.079 |
This | +0.078 |
university | +0.078 |
Neg logit contributions | |
bled | -0.112 |
anut | -0.111 |
i | -0.109 |
years | -0.107 |
bly | -0.107 |
Token | named |
Token ID | 3706 |
Feature activation | -1.21e+00 |
Pos logit contributions | |
anut | +0.033 |
impressed | +0.031 |
bled | +0.031 |
i | +0.031 |
bly | +0.030 |
Neg logit contributions | |
There | -0.046 |
This | -0.045 |
ride | -0.044 |
To | -0.043 |
Where | -0.043 |
Token | Jane |
Token ID | 12091 |
Feature activation | -5.92e+00 |
Pos logit contributions | |
anut | +0.034 |
i | +0.033 |
impressed | +0.031 |
years | +0.031 |
careful | +0.031 |
Neg logit contributions | |
Circ | -0.030 |
There | -0.029 |
ride | -0.029 |
Across | -0.029 |
This | -0.028 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.00e+01 |
Pos logit contributions | |
irling | +0.219 |
impressed | +0.218 |
anut | +0.218 |
i | +0.218 |
glad | +0.215 |
Neg logit contributions | |
Circ | -0.174 |
growth | -0.166 |
Across | -0.163 |
There | -0.162 |
Where | -0.156 |
Token | Once |
Token ID | 7454 |
Feature activation | -2.42e-01 |
Pos logit contributions | |
university | +0.209 |
dictionary | +0.206 |
ride | +0.204 |
grade | +0.203 |
shortcut | +0.200 |
Neg logit contributions | |
i | -0.276 |
aming | -0.268 |
gigg | -0.259 |
bly | -0.248 |
m | -0.246 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.78e+00 |
Pos logit contributions | |
i | +0.006 |
impressed | +0.006 |
anut | +0.006 |
glad | +0.006 |
pleased | +0.006 |
Neg logit contributions | |
Circ | -0.007 |
growth | -0.006 |
This | -0.006 |
There | -0.006 |
ride | -0.006 |
Token | a |
Token ID | 257 |
Feature activation | +3.84e+00 |
Pos logit contributions | |
There | +0.181 |
Circ | +0.179 |
This | +0.178 |
Across | +0.172 |
To | +0.171 |
Neg logit contributions | |
anut | -0.216 |
impressed | -0.214 |
careful | -0.209 |
i | -0.208 |
glad | -0.208 |
Token | time |
Token ID | 640 |
Feature activation | +6.12e+00 |
Pos logit contributions | |
There | +0.041 |
This | +0.040 |
Circ | +0.036 |
ride | +0.036 |
On | +0.035 |
Neg logit contributions | |
anut | -0.163 |
i | -0.159 |
bled | -0.157 |
impressed | -0.157 |
bly | -0.155 |
Token | there |
Token ID | 612 |
Feature activation | +1.65e+01 |
Pos logit contributions | |
There | +0.174 |
This | +0.171 |
Where | +0.162 |
To | +0.162 |
Circ | +0.161 |
Neg logit contributions | |
anut | -0.157 |
i | -0.148 |
impressed | -0.145 |
bly | -0.142 |
bled | -0.141 |
Token | was |
Token ID | 373 |
Feature activation | -3.17e-01 |
Pos logit contributions | |
There | +0.410 |
This | +0.403 |
Circ | +0.397 |
To | +0.385 |
Across | +0.385 |
Neg logit contributions | |
anut | -0.457 |
i | -0.448 |
impressed | -0.438 |
bled | -0.423 |
bly | -0.421 |
Token | a |
Token ID | 257 |
Feature activation | +3.78e+00 |
Pos logit contributions | |
anut | +0.014 |
i | +0.013 |
ushed | +0.013 |
maid | +0.013 |
ising | +0.013 |
Neg logit contributions | |
There | -0.004 |
This | -0.004 |
On | -0.003 |
Under | -0.003 |
ride | -0.003 |
Token | restless |
Token ID | 42537 |
Feature activation | -4.59e+00 |
Pos logit contributions | |
ride | +0.082 |
university | +0.077 |
There | +0.076 |
visit | +0.076 |
Circ | +0.073 |
Neg logit contributions | |
bled | -0.123 |
anut | -0.119 |
i | -0.118 |
years | -0.117 |
bly | -0.116 |
Token | little |
Token ID | 1310 |
Feature activation | -3.42e+00 |
Pos logit contributions | |
anut | +0.136 |
i | +0.135 |
impressed | +0.129 |
careful | +0.124 |
glad | +0.124 |
Neg logit contributions | |
There | -0.105 |
This | -0.103 |
Circ | -0.101 |
Where | -0.099 |
To | -0.099 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.44e+00 |
Pos logit contributions | |
anut | +0.115 |
bled | +0.112 |
impressed | +0.109 |
irling | +0.108 |
years | +0.107 |
Neg logit contributions | |
ride | -0.069 |
There | -0.069 |
This | -0.067 |
Circ | -0.066 |
university | -0.065 |
Token | Once |
Token ID | 7454 |
Feature activation | -2.13e-01 |
Pos logit contributions | |
dictionary | +0.217 |
grade | +0.214 |
ride | +0.213 |
Circ | +0.210 |
university | +0.207 |
Neg logit contributions | |
i | -0.253 |
aming | -0.251 |
gigg | -0.245 |
us | -0.235 |
ll | -0.235 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.77e+00 |
Pos logit contributions | |
i | +0.005 |
impressed | +0.005 |
glad | +0.005 |
anut | +0.005 |
pleased | +0.005 |
Neg logit contributions | |
Circ | -0.006 |
growth | -0.006 |
This | -0.006 |
Across | -0.005 |
There | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | +3.79e+00 |
Pos logit contributions | |
There | +0.179 |
This | +0.175 |
Circ | +0.174 |
Across | +0.169 |
To | +0.169 |
Neg logit contributions | |
anut | -0.221 |
impressed | -0.216 |
careful | -0.212 |
i | -0.211 |
glad | -0.210 |
Token | time |
Token ID | 640 |
Feature activation | +6.14e+00 |
Pos logit contributions | |
There | +0.037 |
This | +0.035 |
ride | +0.032 |
On | +0.032 |
Circ | +0.031 |
Neg logit contributions | |
anut | -0.164 |
i | -0.161 |
bled | -0.161 |
impressed | -0.158 |
bly | -0.158 |
Token | , |
Token ID | 11 |
Feature activation | -5.59e+00 |
Pos logit contributions | |
There | +0.169 |
This | +0.167 |
Where | +0.156 |
To | +0.156 |
On | +0.155 |
Neg logit contributions | |
anut | -0.166 |
i | -0.156 |
impressed | -0.152 |
bly | -0.151 |
bled | -0.150 |
Token | there |
Token ID | 612 |
Feature activation | +1.65e+01 |
Pos logit contributions | |
anut | +0.204 |
i | +0.198 |
impressed | +0.194 |
years | +0.190 |
bled | +0.189 |
Neg logit contributions | |
There | -0.097 |
This | -0.093 |
Where | -0.087 |
Across | -0.086 |
Circ | -0.086 |
Token | was |
Token ID | 373 |
Feature activation | -2.68e-01 |
Pos logit contributions | |
There | +0.430 |
Circ | +0.421 |
This | +0.421 |
Across | +0.407 |
To | +0.406 |
Neg logit contributions | |
anut | -0.425 |
i | -0.422 |
impressed | -0.412 |
bled | -0.397 |
glad | -0.391 |
Token | a |
Token ID | 257 |
Feature activation | +3.70e+00 |
Pos logit contributions | |
anut | +0.011 |
i | +0.011 |
ising | +0.010 |
ushed | +0.010 |
impressed | +0.010 |
Neg logit contributions | |
There | -0.004 |
This | -0.004 |
On | -0.003 |
Circ | -0.003 |
Under | -0.003 |
Token | powerful |
Token ID | 3665 |
Feature activation | -2.58e+00 |
Pos logit contributions | |
ride | +0.079 |
university | +0.075 |
There | +0.073 |
visit | +0.072 |
Circ | +0.070 |
Neg logit contributions | |
bled | -0.122 |
i | -0.119 |
anut | -0.118 |
years | -0.116 |
bly | -0.116 |
Token | lion |
Token ID | 18744 |
Feature activation | +4.97e+00 |
Pos logit contributions | |
anut | +0.083 |
i | +0.081 |
impressed | +0.079 |
bly | +0.077 |
bled | +0.077 |
Neg logit contributions | |
There | -0.054 |
This | -0.053 |
Circ | -0.051 |
To | -0.050 |
Where | -0.050 |
Token | named |
Token ID | 3706 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
There | +0.139 |
Circ | +0.138 |
Across | +0.134 |
This | +0.130 |
Where | +0.129 |
Neg logit contributions | |
i | -0.126 |
anut | -0.123 |
impressed | -0.115 |
gigg | -0.112 |
bly | -0.111 |
Token | â |
Token ID | 22940 |
Feature activation | -9.62e+00 |
Pos logit contributions | |
There | +0.140 |
Circ | +0.136 |
This | +0.134 |
Across | +0.131 |
university | +0.131 |
Neg logit contributions | |
i | -0.164 |
anut | -0.163 |
impressed | -0.156 |
careful | -0.153 |
bly | -0.153 |
Token | € |
Token ID | 26391 |
Feature activation | -7.63e+00 |
Pos logit contributions | |
anut | +0.262 |
i | +0.255 |
impressed | +0.242 |
bly | +0.241 |
careful | +0.237 |
Neg logit contributions | |
There | -0.247 |
Circ | -0.239 |
This | -0.232 |
Across | -0.232 |
Where | -0.230 |
Token | ™ |
Token ID | 8151 |
Feature activation | +4.91e+00 |
Pos logit contributions | |
glad | +0.217 |
i | +0.217 |
careful | +0.217 |
anut | +0.215 |
impressed | +0.215 |
Neg logit contributions | |
growth | -0.161 |
Circ | -0.158 |
corner | -0.152 |
Across | -0.151 |
stairs | -0.149 |
Token | s |
Token ID | 82 |
Feature activation | -2.50e+00 |
Pos logit contributions | |
There | +0.123 |
Circ | +0.120 |
This | +0.119 |
Across | +0.116 |
Where | +0.115 |
Neg logit contributions | |
anut | -0.136 |
i | -0.132 |
impressed | -0.129 |
bled | -0.125 |
bly | -0.123 |
Token | go |
Token ID | 467 |
Feature activation | +5.13e+00 |
Pos logit contributions | |
i | +0.084 |
anut | +0.082 |
careful | +0.079 |
impressed | +0.077 |
bly | +0.077 |
Neg logit contributions | |
Circ | -0.050 |
There | -0.046 |
This | -0.045 |
Across | -0.044 |
Where | -0.043 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
There | +0.116 |
Circ | +0.115 |
Across | +0.112 |
This | +0.110 |
growth | +0.109 |
Neg logit contributions | |
anut | -0.154 |
i | -0.154 |
careful | -0.146 |
impressed | -0.143 |
glad | -0.142 |
Token | ! |
Token ID | 0 |
Feature activation | +6.09e+00 |
Pos logit contributions | |
There | +0.428 |
This | +0.412 |
Circ | +0.412 |
Across | +0.402 |
Where | +0.397 |
Neg logit contributions | |
anut | -0.444 |
i | -0.436 |
impressed | -0.422 |
careful | -0.408 |
bled | -0.404 |
Token | â |
Token ID | 22940 |
Feature activation | -9.56e+00 |
Pos logit contributions | |
ork | +0.134 |
Circ | +0.128 |
growth | +0.126 |
hike | +0.126 |
university | +0.125 |
Neg logit contributions | |
glad | -0.171 |
anut | -0.168 |
careful | -0.168 |
impressed | -0.167 |
i | -0.166 |
Token | € |
Token ID | 26391 |
Feature activation | -7.51e+00 |
Pos logit contributions | |
anut | +0.255 |
i | +0.248 |
impressed | +0.236 |
bly | +0.234 |
bled | +0.230 |
Neg logit contributions | |
There | -0.250 |
Circ | -0.245 |
Where | -0.236 |
This | -0.236 |
Across | -0.235 |
Token | They |
Token ID | 1119 |
Feature activation | +2.35e-03 |
Pos logit contributions | |
anut | +0.211 |
glad | +0.211 |
careful | +0.210 |
i | +0.210 |
impressed | +0.209 |
Neg logit contributions | |
Circ | -0.167 |
growth | -0.162 |
Across | -0.157 |
corner | -0.151 |
There | -0.148 |
Token | ran |
Token ID | 4966 |
Feature activation | +1.71e+00 |
Pos logit contributions | |
There | +0.000 |
This | +0.000 |
ride | +0.000 |
Circ | +0.000 |
To | +0.000 |
Neg logit contributions | |
anut | -0.000 |
impressed | -0.000 |
i | -0.000 |
bled | -0.000 |
bly | -0.000 |
Token | â |
Token ID | 22940 |
Feature activation | -9.72e+00 |
Pos logit contributions | |
There | +0.136 |
Circ | +0.131 |
This | +0.129 |
Across | +0.127 |
Where | +0.126 |
Neg logit contributions | |
i | -0.166 |
anut | -0.164 |
impressed | -0.156 |
careful | -0.154 |
bly | -0.153 |
Token | € |
Token ID | 26391 |
Feature activation | -7.88e+00 |
Pos logit contributions | |
anut | +0.263 |
i | +0.254 |
impressed | +0.243 |
bly | +0.241 |
bled | +0.238 |
Neg logit contributions | |
There | -0.252 |
Circ | -0.244 |
This | -0.238 |
Across | -0.237 |
Where | -0.236 |
Token | ™ |
Token ID | 8151 |
Feature activation | +4.84e+00 |
Pos logit contributions | |
impressed | +0.237 |
glad | +0.236 |
i | +0.232 |
anut | +0.232 |
careful | +0.230 |
Neg logit contributions | |
growth | -0.160 |
Circ | -0.159 |
corner | -0.150 |
Across | -0.150 |
stairs | -0.146 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +9.99e+00 |
Pos logit contributions | |
There | +0.125 |
This | +0.122 |
Circ | +0.121 |
Across | +0.118 |
Where | +0.117 |
Neg logit contributions | |
anut | -0.131 |
i | -0.127 |
impressed | -0.124 |
bled | -0.120 |
bly | -0.119 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.01e-01 |
Pos logit contributions | |
shortcut | +0.214 |
grade | +0.213 |
dictionary | +0.209 |
ride | +0.209 |
university | +0.207 |
Neg logit contributions | |
i | -0.258 |
aming | -0.254 |
gigg | -0.240 |
m | -0.227 |
efully | -0.227 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
i | +0.008 |
impressed | +0.008 |
anut | +0.008 |
glad | +0.008 |
pleased | +0.007 |
Neg logit contributions | |
Circ | -0.008 |
growth | -0.008 |
This | -0.007 |
There | -0.007 |
Across | -0.007 |
Token | was |
Token ID | 373 |
Feature activation | -3.62e-01 |
Pos logit contributions | |
There | +0.331 |
This | +0.329 |
Where | +0.306 |
ride | +0.302 |
To | +0.299 |
Neg logit contributions | |
anut | -0.561 |
bly | -0.528 |
years | -0.526 |
impressed | -0.519 |
i | -0.518 |
Token | a |
Token ID | 257 |
Feature activation | +3.73e+00 |
Pos logit contributions | |
anut | +0.015 |
bled | +0.015 |
ushed | +0.014 |
ising | +0.014 |
i | +0.014 |
Neg logit contributions | |
There | -0.005 |
This | -0.005 |
On | -0.004 |
Under | -0.004 |
| -0.004 |
Token | boy |
Token ID | 2933 |
Feature activation | -7.29e-01 |
Pos logit contributions | |
ride | +0.084 |
There | +0.083 |
Circ | +0.081 |
This | +0.080 |
university | +0.079 |
Neg logit contributions | |
bled | -0.112 |
anut | -0.112 |
i | -0.109 |
bly | -0.108 |
years | -0.107 |
Token | who |
Token ID | 508 |
Feature activation | +3.03e+00 |
Pos logit contributions | |
bled | +0.019 |
anut | +0.019 |
impressed | +0.018 |
bly | +0.018 |
pleased | +0.017 |
Neg logit contributions | |
This | -0.020 |
to | -0.019 |
There | -0.019 |
ride | -0.019 |
To | -0.019 |
Token | liked |
Token ID | 8288 |
Feature activation | -6.95e-01 |
Pos logit contributions | |
There | +0.098 |
This | +0.097 |
Circ | +0.094 |
ride | +0.093 |
Where | +0.093 |
Neg logit contributions | |
anut | -0.064 |
i | -0.061 |
impressed | -0.060 |
bled | -0.058 |
bly | -0.057 |
Token | Once |
Token ID | 7454 |
Feature activation | -2.42e-01 |
Pos logit contributions | |
dictionary | +0.218 |
grade | +0.214 |
hike | +0.212 |
university | +0.210 |
ride | +0.206 |
Neg logit contributions | |
aming | -0.253 |
i | -0.244 |
glad | -0.242 |
gigg | -0.237 |
bled | -0.233 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.55e+00 |
Pos logit contributions | |
anut | +0.007 |
impressed | +0.007 |
i | +0.007 |
glad | +0.006 |
pleased | +0.006 |
Neg logit contributions | |
Circ | -0.006 |
This | -0.006 |
There | -0.006 |
growth | -0.006 |
Across | -0.006 |
Token | a |
Token ID | 257 |
Feature activation | +3.49e+00 |
Pos logit contributions | |
There | +0.177 |
This | +0.173 |
Circ | +0.170 |
Across | +0.166 |
To | +0.166 |
Neg logit contributions | |
anut | -0.213 |
impressed | -0.209 |
careful | -0.205 |
glad | -0.203 |
i | -0.202 |
Token | time |
Token ID | 640 |
Feature activation | +5.92e+00 |
Pos logit contributions | |
There | +0.032 |
This | +0.031 |
ride | +0.029 |
On | +0.028 |
Circ | +0.027 |
Neg logit contributions | |
anut | -0.152 |
i | -0.150 |
bled | -0.150 |
bly | -0.148 |
impressed | -0.148 |
Token | , |
Token ID | 11 |
Feature activation | -5.88e+00 |
Pos logit contributions | |
There | +0.166 |
This | +0.164 |
Where | +0.154 |
To | +0.153 |
Circ | +0.153 |
Neg logit contributions | |
anut | -0.156 |
i | -0.147 |
impressed | -0.143 |
bly | -0.142 |
bled | -0.141 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.218 |
i | +0.212 |
impressed | +0.207 |
years | +0.203 |
irling | +0.202 |
Neg logit contributions | |
There | -0.100 |
This | -0.096 |
Across | -0.089 |
Where | -0.089 |
On | -0.088 |
Token | was |
Token ID | 373 |
Feature activation | -5.33e-01 |
Pos logit contributions | |
There | +0.409 |
This | +0.400 |
Circ | +0.395 |
Across | +0.384 |
To | +0.383 |
Neg logit contributions | |
anut | -0.452 |
i | -0.442 |
impressed | -0.433 |
bled | -0.420 |
bly | -0.415 |
Token | a |
Token ID | 257 |
Feature activation | +3.44e+00 |
Pos logit contributions | |
anut | +0.022 |
i | +0.022 |
ushed | +0.021 |
ising | +0.021 |
maid | +0.021 |
Neg logit contributions | |
There | -0.007 |
This | -0.007 |
On | -0.006 |
Under | -0.006 |
Circ | -0.006 |
Token | march |
Token ID | 9960 |
Feature activation | +4.43e+00 |
Pos logit contributions | |
ride | +0.072 |
university | +0.068 |
There | +0.066 |
visit | +0.066 |
Circ | +0.065 |
Neg logit contributions | |
bled | -0.114 |
i | -0.111 |
anut | -0.111 |
years | -0.110 |
bly | -0.108 |
Token | . |
Token ID | 13 |
Feature activation | +6.81e+00 |
Pos logit contributions | |
There | +0.106 |
Circ | +0.104 |
growth | +0.103 |
Where | +0.099 |
Across | +0.099 |
Neg logit contributions | |
impressed | -0.120 |
i | -0.120 |
bly | -0.119 |
anut | -0.116 |
ising | -0.116 |
Token | It |
Token ID | 632 |
Feature activation | +5.58e+00 |
Pos logit contributions | |
ork | +0.123 |
shortcut | +0.119 |
wheelchair | +0.116 |
aid | +0.114 |
hike | +0.110 |
Neg logit contributions | |
glad | -0.188 |
years | -0.183 |
gigg | -0.183 |
omo | -0.181 |
enthusiastic | -0.181 |
Token | the |
Token ID | 262 |
Feature activation | +7.56e+00 |
Pos logit contributions | |
This | +0.063 |
to | +0.061 |
There | +0.060 |
Circ | +0.060 |
ride | +0.060 |
Neg logit contributions | |
anut | -0.086 |
years | -0.082 |
impressed | -0.082 |
i | -0.082 |
irling | -0.080 |
Token | pumpkin |
Token ID | 30089 |
Feature activation | +7.60e+00 |
Pos logit contributions | |
This | +0.180 |
There | +0.178 |
ride | +0.175 |
Circ | +0.175 |
Across | +0.166 |
Neg logit contributions | |
anut | -0.214 |
i | -0.211 |
impressed | -0.205 |
irling | -0.203 |
bly | -0.203 |
Token | together |
Token ID | 1978 |
Feature activation | +5.06e-01 |
Pos logit contributions | |
This | +0.195 |
There | +0.193 |
ride | +0.190 |
Circ | +0.187 |
to | +0.184 |
Neg logit contributions | |
anut | -0.200 |
impressed | -0.192 |
i | -0.191 |
years | -0.191 |
irling | -0.188 |
Token | , |
Token ID | 11 |
Feature activation | -5.99e+00 |
Pos logit contributions | |
There | +0.011 |
Circ | +0.011 |
This | +0.011 |
Where | +0.011 |
Across | +0.011 |
Neg logit contributions | |
i | -0.016 |
anut | -0.015 |
impressed | -0.015 |
careful | -0.014 |
bled | -0.014 |
Token | until |
Token ID | 1566 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
anut | +0.160 |
i | +0.152 |
bly | +0.149 |
bled | +0.148 |
years | +0.148 |
Neg logit contributions | |
There | -0.156 |
ride | -0.155 |
This | -0.155 |
to | -0.148 |
To | -0.148 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.044 |
i | +0.043 |
impressed | +0.042 |
bled | +0.041 |
bly | +0.041 |
Neg logit contributions | |
There | -0.027 |
This | -0.026 |
Circ | -0.025 |
Across | -0.025 |
Where | -0.024 |
Token | was |
Token ID | 373 |
Feature activation | -5.15e-01 |
Pos logit contributions | |
There | +0.435 |
This | +0.427 |
Circ | +0.423 |
Across | +0.411 |
To | +0.409 |
Neg logit contributions | |
i | -0.424 |
anut | -0.417 |
impressed | -0.402 |
bled | -0.393 |
bly | -0.387 |
Token | nothing |
Token ID | 2147 |
Feature activation | -8.82e-01 |
Pos logit contributions | |
anut | +0.016 |
i | +0.015 |
impressed | +0.015 |
bled | +0.015 |
bly | +0.015 |
Neg logit contributions | |
There | -0.011 |
This | -0.011 |
Circ | -0.011 |
Across | -0.011 |
Where | -0.011 |
Token | left |
Token ID | 1364 |
Feature activation | +6.15e+00 |
Pos logit contributions | |
impressed | +0.028 |
i | +0.028 |
anut | +0.028 |
careful | +0.027 |
pleased | +0.027 |
Neg logit contributions | |
There | -0.019 |
This | -0.018 |
Circ | -0.018 |
On | -0.018 |
Across | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | +6.60e+00 |
Pos logit contributions | |
Circ | +0.143 |
There | +0.140 |
Across | +0.135 |
This | +0.131 |
Where | +0.131 |
Neg logit contributions | |
i | -0.192 |
anut | -0.184 |
bled | -0.174 |
bly | -0.172 |
impressed | -0.171 |
Token | They |
Token ID | 1119 |
Feature activation | -5.59e-01 |
Pos logit contributions | |
Circ | +0.121 |
hike | +0.119 |
ride | +0.118 |
university | +0.114 |
shortcut | +0.114 |
Neg logit contributions | |
glad | -0.209 |
impressed | -0.205 |
i | -0.201 |
anut | -0.200 |
pleased | -0.199 |
Token | eddy |
Token ID | 21874 |
Feature activation | -3.58e+00 |
Pos logit contributions | |
anut | +0.107 |
i | +0.107 |
bly | +0.097 |
years | +0.097 |
bled | +0.097 |
Neg logit contributions | |
There | -0.149 |
This | -0.144 |
Circ | -0.140 |
Where | -0.139 |
Across | -0.139 |
Token | bear |
Token ID | 6842 |
Feature activation | +3.57e+00 |
Pos logit contributions | |
anut | +0.101 |
i | +0.100 |
impressed | +0.093 |
bly | +0.092 |
bled | +0.091 |
Neg logit contributions | |
There | -0.090 |
This | -0.087 |
Circ | -0.086 |
Across | -0.084 |
Where | -0.084 |
Token | with |
Token ID | 351 |
Feature activation | +3.78e+00 |
Pos logit contributions | |
There | +0.078 |
Circ | +0.077 |
This | +0.074 |
Where | +0.073 |
Across | +0.073 |
Neg logit contributions | |
i | -0.112 |
anut | -0.111 |
impressed | -0.105 |
careful | -0.102 |
bly | -0.102 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +9.99e+00 |
Pos logit contributions | |
There | +0.098 |
Circ | +0.096 |
This | +0.095 |
Across | +0.093 |
Where | +0.092 |
Neg logit contributions | |
anut | -0.099 |
impressed | -0.098 |
i | -0.098 |
glad | -0.095 |
careful | -0.094 |
Token | Once |
Token ID | 7454 |
Feature activation | -1.98e-01 |
Pos logit contributions | |
dictionary | +0.227 |
shortcut | +0.222 |
university | +0.215 |
ride | +0.215 |
grade | +0.213 |
Neg logit contributions | |
i | -0.261 |
aming | -0.252 |
gigg | -0.246 |
m | -0.227 |
years | -0.223 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
i | +0.005 |
impressed | +0.005 |
anut | +0.005 |
glad | +0.005 |
careful | +0.005 |
Neg logit contributions | |
Circ | -0.005 |
This | -0.005 |
growth | -0.005 |
There | -0.005 |
Across | -0.005 |
Token | was |
Token ID | 373 |
Feature activation | -6.55e-01 |
Pos logit contributions | |
There | +0.349 |
This | +0.344 |
Where | +0.323 |
ride | +0.321 |
To | +0.319 |
Neg logit contributions | |
anut | -0.535 |
i | -0.503 |
bly | -0.501 |
impressed | -0.499 |
years | -0.495 |
Token | a |
Token ID | 257 |
Feature activation | +3.51e+00 |
Pos logit contributions | |
anut | +0.028 |
i | +0.027 |
ushed | +0.027 |
maid | +0.027 |
bled | +0.027 |
Neg logit contributions | |
There | -0.008 |
This | -0.008 |
to | -0.007 |
On | -0.007 |
ride | -0.007 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.70e+00 |
Pos logit contributions | |
ride | +0.080 |
There | +0.076 |
university | +0.074 |
visit | +0.074 |
This | +0.074 |
Neg logit contributions | |
bled | -0.107 |
anut | -0.105 |
i | -0.104 |
years | -0.102 |
bly | -0.102 |
Token | named |
Token ID | 3706 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
anut | +0.038 |
impressed | +0.036 |
bled | +0.035 |
i | +0.035 |
bly | +0.034 |
Neg logit contributions | |
There | -0.053 |
This | -0.052 |
ride | -0.051 |
To | -0.050 |
Where | -0.050 |
Token | Ana |
Token ID | 17639 |
Feature activation | -7.15e+00 |
Pos logit contributions | |
anut | +0.034 |
i | +0.034 |
careful | +0.032 |
glad | +0.032 |
years | +0.032 |
Neg logit contributions | |
Circ | -0.034 |
There | -0.032 |
Across | -0.031 |
ride | -0.031 |
growth | -0.031 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.02e+01 |
Pos logit contributions | |
glad | +0.237 |
impressed | +0.236 |
anut | +0.232 |
i | +0.230 |
careful | +0.229 |
Neg logit contributions | |
Circ | -0.163 |
growth | -0.155 |
Across | -0.147 |
There | -0.142 |
corner | -0.141 |
Token | Once |
Token ID | 7454 |
Feature activation | -2.39e-01 |
Pos logit contributions | |
dictionary | +0.211 |
university | +0.210 |
grade | +0.209 |
ride | +0.206 |
Circ | +0.205 |
Neg logit contributions | |
i | -0.282 |
aming | -0.270 |
gigg | -0.261 |
ll | -0.247 |
glad | -0.246 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.68e+00 |
Pos logit contributions | |
i | +0.006 |
impressed | +0.006 |
anut | +0.006 |
glad | +0.006 |
pleased | +0.006 |
Neg logit contributions | |
Circ | -0.006 |
growth | -0.006 |
This | -0.006 |
There | -0.006 |
Across | -0.006 |
Token | a |
Token ID | 257 |
Feature activation | +3.66e+00 |
Pos logit contributions | |
There | +0.173 |
This | +0.169 |
Circ | +0.168 |
Across | +0.162 |
To | +0.162 |
Neg logit contributions | |
anut | -0.224 |
impressed | -0.219 |
i | -0.216 |
careful | -0.213 |
glad | -0.212 |
Token | time |
Token ID | 640 |
Feature activation | +6.05e+00 |
Pos logit contributions | |
There | +0.034 |
This | +0.032 |
ride | +0.030 |
On | +0.029 |
Circ | +0.027 |
Neg logit contributions | |
anut | -0.161 |
bled | -0.158 |
i | -0.158 |
bly | -0.155 |
impressed | -0.155 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
There | +0.171 |
This | +0.168 |
Where | +0.159 |
To | +0.158 |
Circ | +0.158 |
Neg logit contributions | |
anut | -0.157 |
i | -0.148 |
impressed | -0.144 |
bly | -0.142 |
bled | -0.141 |
Token | was |
Token ID | 373 |
Feature activation | -4.71e-01 |
Pos logit contributions | |
There | +0.398 |
This | +0.390 |
Circ | +0.381 |
To | +0.371 |
Across | +0.371 |
Neg logit contributions | |
anut | -0.468 |
i | -0.454 |
impressed | -0.445 |
bled | -0.432 |
bly | -0.430 |
Token | a |
Token ID | 257 |
Feature activation | +3.59e+00 |
Pos logit contributions | |
anut | +0.020 |
i | +0.019 |
ushed | +0.019 |
impressed | +0.019 |
maid | +0.019 |
Neg logit contributions | |
There | -0.006 |
This | -0.006 |
On | -0.005 |
Under | -0.005 |
ride | -0.005 |
Token | knight |
Token ID | 22062 |
Feature activation | +2.87e+00 |
Pos logit contributions | |
ride | +0.078 |
university | +0.073 |
There | +0.073 |
visit | +0.072 |
Circ | +0.071 |
Neg logit contributions | |
bled | -0.116 |
anut | -0.113 |
i | -0.112 |
years | -0.111 |
bly | -0.110 |
Token | . |
Token ID | 13 |
Feature activation | +7.05e+00 |
Pos logit contributions | |
There | +0.083 |
Circ | +0.081 |
This | +0.079 |
Across | +0.079 |
Where | +0.078 |
Neg logit contributions | |
i | -0.071 |
anut | -0.071 |
impressed | -0.066 |
careful | -0.064 |
bly | -0.063 |
Token | He |
Token ID | 679 |
Feature activation | -1.87e+00 |
Pos logit contributions | |
ork | +0.216 |
shortcut | +0.182 |
wheelchair | +0.182 |
aid | +0.180 |
hike | +0.172 |
Neg logit contributions | |
grateful | -0.147 |
enthusiastic | -0.147 |
glad | -0.139 |
impressed | -0.139 |
happy | -0.138 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.03e-01 |
Pos logit contributions | |
university | +0.201 |
ride | +0.201 |
grade | +0.201 |
dictionary | +0.198 |
Circ | +0.198 |
Neg logit contributions | |
i | -0.276 |
aming | -0.271 |
gigg | -0.260 |
bled | -0.250 |
bly | -0.250 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.53e+00 |
Pos logit contributions | |
i | +0.008 |
impressed | +0.008 |
anut | +0.008 |
glad | +0.008 |
pleased | +0.007 |
Neg logit contributions | |
Circ | -0.008 |
growth | -0.008 |
This | -0.008 |
Across | -0.008 |
ride | -0.008 |
Token | a |
Token ID | 257 |
Feature activation | +3.58e+00 |
Pos logit contributions | |
There | +0.173 |
Circ | +0.169 |
This | +0.168 |
To | +0.163 |
Across | +0.162 |
Neg logit contributions | |
anut | -0.216 |
impressed | -0.211 |
i | -0.207 |
careful | -0.206 |
glad | -0.204 |
Token | time |
Token ID | 640 |
Feature activation | +5.98e+00 |
Pos logit contributions | |
There | +0.042 |
This | +0.040 |
Circ | +0.038 |
ride | +0.036 |
To | +0.036 |
Neg logit contributions | |
anut | -0.148 |
i | -0.144 |
impressed | -0.142 |
bled | -0.140 |
bly | -0.140 |
Token | , |
Token ID | 11 |
Feature activation | -5.80e+00 |
Pos logit contributions | |
There | +0.173 |
This | +0.170 |
Circ | +0.164 |
Where | +0.162 |
To | +0.162 |
Neg logit contributions | |
anut | -0.147 |
i | -0.140 |
impressed | -0.137 |
bly | -0.133 |
bled | -0.132 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.210 |
i | +0.204 |
impressed | +0.200 |
bled | +0.195 |
years | +0.195 |
Neg logit contributions | |
There | -0.102 |
This | -0.099 |
Where | -0.092 |
Across | -0.092 |
Circ | -0.091 |
Token | was |
Token ID | 373 |
Feature activation | -5.58e-01 |
Pos logit contributions | |
There | +0.406 |
This | +0.397 |
Circ | +0.392 |
To | +0.380 |
Across | +0.380 |
Neg logit contributions | |
anut | -0.455 |
i | -0.444 |
impressed | -0.435 |
bled | -0.422 |
bly | -0.418 |
Token | a |
Token ID | 257 |
Feature activation | +3.42e+00 |
Pos logit contributions | |
anut | +0.021 |
i | +0.021 |
impressed | +0.020 |
ising | +0.020 |
irling | +0.020 |
Neg logit contributions | |
There | -0.009 |
This | -0.009 |
Circ | -0.008 |
On | -0.008 |
ride | -0.008 |
Token | boy |
Token ID | 2933 |
Feature activation | -8.36e-01 |
Pos logit contributions | |
ride | +0.074 |
university | +0.072 |
There | +0.070 |
visit | +0.069 |
Circ | +0.068 |
Neg logit contributions | |
bled | -0.108 |
i | -0.107 |
anut | -0.107 |
years | -0.104 |
bly | -0.103 |
Token | named |
Token ID | 3706 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
anut | +0.020 |
bled | +0.020 |
impressed | +0.020 |
i | +0.019 |
bly | +0.019 |
Neg logit contributions | |
This | -0.024 |
There | -0.024 |
ride | -0.024 |
to | -0.023 |
To | -0.023 |
Token | Joe |
Token ID | 5689 |
Feature activation | -3.96e+00 |
Pos logit contributions | |
anut | +0.027 |
careful | +0.024 |
i | +0.024 |
impressed | +0.023 |
glad | +0.023 |
Neg logit contributions | |
ork | -0.044 |
Circ | -0.043 |
growth | -0.043 |
Across | -0.043 |
To | -0.043 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.30e-01 |
Pos logit contributions | |
grade | +0.227 |
university | +0.223 |
ride | +0.223 |
Circ | +0.219 |
hike | +0.218 |
Neg logit contributions | |
i | -0.229 |
aming | -0.224 |
gigg | -0.222 |
glad | -0.221 |
ll | -0.218 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.65e+00 |
Pos logit contributions | |
anut | +0.009 |
i | +0.009 |
impressed | +0.009 |
glad | +0.009 |
pleased | +0.008 |
Neg logit contributions | |
Circ | -0.008 |
There | -0.008 |
This | -0.008 |
Across | -0.008 |
growth | -0.008 |
Token | a |
Token ID | 257 |
Feature activation | +3.61e+00 |
Pos logit contributions | |
There | +0.177 |
This | +0.173 |
Circ | +0.171 |
Across | +0.165 |
Where | +0.165 |
Neg logit contributions | |
anut | -0.219 |
impressed | -0.215 |
careful | -0.210 |
i | -0.209 |
glad | -0.207 |
Token | time |
Token ID | 640 |
Feature activation | +6.00e+00 |
Pos logit contributions | |
There | +0.035 |
This | +0.034 |
ride | +0.031 |
On | +0.031 |
Circ | +0.030 |
Neg logit contributions | |
anut | -0.156 |
i | -0.153 |
bled | -0.152 |
impressed | -0.151 |
bly | -0.151 |
Token | , |
Token ID | 11 |
Feature activation | -5.83e+00 |
Pos logit contributions | |
There | +0.172 |
This | +0.169 |
Circ | +0.162 |
Where | +0.160 |
To | +0.160 |
Neg logit contributions | |
anut | -0.150 |
i | -0.143 |
impressed | -0.139 |
bly | -0.137 |
bled | -0.136 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.211 |
i | +0.205 |
impressed | +0.201 |
bled | +0.196 |
bly | +0.196 |
Neg logit contributions | |
There | -0.102 |
This | -0.099 |
Circ | -0.092 |
Where | -0.092 |
Across | -0.091 |
Token | was |
Token ID | 373 |
Feature activation | -6.07e-01 |
Pos logit contributions | |
There | +0.405 |
This | +0.397 |
Circ | +0.391 |
Across | +0.380 |
To | +0.380 |
Neg logit contributions | |
anut | -0.455 |
i | -0.444 |
impressed | -0.435 |
bled | -0.422 |
bly | -0.417 |
Token | a |
Token ID | 257 |
Feature activation | +3.49e+00 |
Pos logit contributions | |
anut | +0.024 |
i | +0.024 |
impressed | +0.023 |
ising | +0.023 |
ushed | +0.023 |
Neg logit contributions | |
There | -0.009 |
This | -0.009 |
Circ | -0.008 |
On | -0.008 |
Under | -0.008 |
Token | mom |
Token ID | 1995 |
Feature activation | -2.24e+00 |
Pos logit contributions | |
ride | +0.073 |
university | +0.069 |
There | +0.068 |
visit | +0.067 |
Circ | +0.066 |
Neg logit contributions | |
bled | -0.116 |
i | -0.114 |
anut | -0.113 |
years | -0.111 |
bly | -0.111 |
Token | my |
Token ID | 1820 |
Feature activation | -5.19e+00 |
Pos logit contributions | |
bled | +0.062 |
anut | +0.062 |
impressed | +0.060 |
bly | +0.060 |
irling | +0.060 |
Neg logit contributions | |
There | -0.056 |
This | -0.056 |
ride | -0.054 |
to | -0.053 |
visit | -0.052 |
Token | and |
Token ID | 290 |
Feature activation | -5.31e+00 |
Pos logit contributions | |
anut | +0.149 |
i | +0.144 |
impressed | +0.141 |
bled | +0.137 |
bly | +0.137 |
Neg logit contributions | |
There | -0.127 |
This | -0.124 |
Circ | -0.121 |
To | -0.118 |
ride | -0.118 |
Token | Once |
Token ID | 7454 |
Feature activation | -4.10e-01 |
Pos logit contributions | |
dictionary | +0.217 |
university | +0.215 |
grade | +0.212 |
ride | +0.207 |
shortcut | +0.206 |
Neg logit contributions | |
aming | -0.241 |
i | -0.240 |
gigg | -0.238 |
m | -0.230 |
ll | -0.227 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.60e+00 |
Pos logit contributions | |
i | +0.011 |
impressed | +0.011 |
anut | +0.011 |
glad | +0.010 |
pleased | +0.010 |
Neg logit contributions | |
Circ | -0.011 |
growth | -0.010 |
This | -0.010 |
There | -0.010 |
Across | -0.010 |
Token | a |
Token ID | 257 |
Feature activation | +3.60e+00 |
Pos logit contributions | |
There | +0.178 |
This | +0.174 |
Circ | +0.173 |
Across | +0.168 |
To | +0.167 |
Neg logit contributions | |
anut | -0.213 |
impressed | -0.210 |
careful | -0.206 |
i | -0.204 |
glad | -0.204 |
Token | time |
Token ID | 640 |
Feature activation | +5.99e+00 |
Pos logit contributions | |
There | +0.035 |
This | +0.034 |
ride | +0.031 |
On | +0.030 |
Circ | +0.030 |
Neg logit contributions | |
anut | -0.156 |
i | -0.153 |
bled | -0.152 |
impressed | -0.150 |
bly | -0.150 |
Token | , |
Token ID | 11 |
Feature activation | -5.80e+00 |
Pos logit contributions | |
There | +0.171 |
This | +0.169 |
Circ | +0.160 |
Where | +0.160 |
To | +0.159 |
Neg logit contributions | |
anut | -0.151 |
i | -0.144 |
impressed | -0.140 |
bly | -0.137 |
bled | -0.136 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.216 |
i | +0.209 |
impressed | +0.205 |
years | +0.200 |
irling | +0.200 |
Neg logit contributions | |
There | -0.099 |
This | -0.095 |
Where | -0.088 |
Across | -0.087 |
On | -0.086 |
Token | was |
Token ID | 373 |
Feature activation | -4.70e-01 |
Pos logit contributions | |
There | +0.414 |
This | +0.405 |
Circ | +0.402 |
Across | +0.390 |
To | +0.389 |
Neg logit contributions | |
anut | -0.441 |
i | -0.434 |
impressed | -0.425 |
bled | -0.410 |
bly | -0.405 |
Token | a |
Token ID | 257 |
Feature activation | +3.50e+00 |
Pos logit contributions | |
anut | +0.020 |
i | +0.020 |
ising | +0.019 |
ushed | +0.019 |
maid | +0.019 |
Neg logit contributions | |
There | -0.006 |
This | -0.006 |
On | -0.005 |
Under | -0.005 |
Circ | -0.005 |
Token | ch |
Token ID | 442 |
Feature activation | -3.27e+00 |
Pos logit contributions | |
ride | +0.073 |
university | +0.069 |
There | +0.067 |
visit | +0.067 |
Circ | +0.065 |
Neg logit contributions | |
bled | -0.117 |
i | -0.114 |
anut | -0.113 |
years | -0.112 |
bly | -0.111 |
Token | ubby |
Token ID | 38393 |
Feature activation | -6.84e+00 |
Pos logit contributions | |
anut | +0.043 |
impressed | +0.039 |
i | +0.038 |
bled | +0.036 |
bly | +0.034 |
Neg logit contributions | |
There | -0.131 |
This | -0.129 |
Circ | -0.126 |
ride | -0.126 |
university | -0.125 |
Token | little |
Token ID | 1310 |
Feature activation | -3.60e+00 |
Pos logit contributions | |
anut | +0.223 |
impressed | +0.215 |
i | +0.213 |
bled | +0.212 |
years | +0.211 |
Neg logit contributions | |
There | -0.139 |
This | -0.135 |
ride | -0.133 |
Circ | -0.130 |
university | -0.126 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +9.95e+00 |
Pos logit contributions | |
anut | +0.245 |
impressed | +0.243 |
i | +0.239 |
glad | +0.239 |
irling | +0.237 |
Neg logit contributions | |
Circ | -0.157 |
Across | -0.145 |
growth | -0.143 |
There | -0.143 |
Where | -0.138 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.73e-01 |
Pos logit contributions | |
university | +0.211 |
Circ | +0.209 |
grade | +0.208 |
dictionary | +0.204 |
ride | +0.204 |
Neg logit contributions | |
aming | -0.267 |
i | -0.264 |
gigg | -0.247 |
m | -0.240 |
bly | -0.236 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.59e+00 |
Pos logit contributions | |
i | +0.010 |
impressed | +0.010 |
anut | +0.010 |
glad | +0.010 |
pleased | +0.009 |
Neg logit contributions | |
Circ | -0.010 |
growth | -0.009 |
This | -0.009 |
There | -0.009 |
ride | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +3.68e+00 |
Pos logit contributions | |
There | +0.176 |
Circ | +0.174 |
This | +0.173 |
Across | +0.167 |
To | +0.166 |
Neg logit contributions | |
anut | -0.212 |
impressed | -0.211 |
careful | -0.205 |
glad | -0.204 |
i | -0.204 |
Token | time |
Token ID | 640 |
Feature activation | +5.94e+00 |
Pos logit contributions | |
There | +0.034 |
This | +0.033 |
ride | +0.031 |
On | +0.030 |
Circ | +0.029 |
Neg logit contributions | |
anut | -0.160 |
bled | -0.157 |
i | -0.157 |
impressed | -0.155 |
bly | -0.155 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
There | +0.166 |
This | +0.164 |
Where | +0.154 |
To | +0.154 |
Across | +0.153 |
Neg logit contributions | |
anut | -0.157 |
i | -0.148 |
impressed | -0.144 |
bly | -0.142 |
bled | -0.141 |
Token | was |
Token ID | 373 |
Feature activation | -3.88e-01 |
Pos logit contributions | |
There | +0.405 |
This | +0.397 |
Circ | +0.390 |
To | +0.380 |
Across | +0.379 |
Neg logit contributions | |
anut | -0.456 |
i | -0.446 |
impressed | -0.436 |
bled | -0.422 |
bly | -0.419 |
Token | a |
Token ID | 257 |
Feature activation | +3.68e+00 |
Pos logit contributions | |
anut | +0.017 |
i | +0.016 |
maid | +0.016 |
ushed | +0.016 |
ising | +0.016 |
Neg logit contributions | |
There | -0.005 |
This | -0.004 |
On | -0.004 |
Under | -0.004 |
ride | -0.004 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.65e+00 |
Pos logit contributions | |
ride | +0.078 |
university | +0.075 |
There | +0.073 |
visit | +0.072 |
Circ | +0.070 |
Neg logit contributions | |
bled | -0.122 |
anut | -0.117 |
i | -0.117 |
years | -0.115 |
bly | -0.114 |
Token | called |
Token ID | 1444 |
Feature activation | -9.70e-01 |
Pos logit contributions | |
anut | +0.038 |
bled | +0.037 |
impressed | +0.036 |
i | +0.035 |
bly | +0.035 |
Neg logit contributions | |
There | -0.050 |
This | -0.049 |
ride | -0.048 |
To | -0.047 |
Where | -0.047 |
Token | Emma |
Token ID | 18966 |
Feature activation | -6.23e+00 |
Pos logit contributions | |
i | +0.022 |
anut | +0.022 |
careful | +0.020 |
years | +0.020 |
glad | +0.020 |
Neg logit contributions | |
Circ | -0.027 |
Across | -0.026 |
ride | -0.026 |
There | -0.026 |
To | -0.025 |
Token | in |
Token ID | 287 |
Feature activation | +1.01e+01 |
Pos logit contributions | |
anut | +0.206 |
i | +0.201 |
impressed | +0.197 |
bled | +0.192 |
bly | +0.191 |
Neg logit contributions | |
There | -0.107 |
This | -0.104 |
Circ | -0.097 |
Across | -0.096 |
Where | -0.096 |
Token | a |
Token ID | 257 |
Feature activation | +3.44e+00 |
Pos logit contributions | |
There | +0.225 |
This | +0.221 |
Circ | +0.221 |
Where | +0.216 |
To | +0.214 |
Neg logit contributions | |
anut | -0.293 |
i | -0.286 |
careful | -0.282 |
impressed | -0.282 |
bly | -0.280 |
Token | small |
Token ID | 1402 |
Feature activation | -1.40e+00 |
Pos logit contributions | |
There | +0.079 |
This | +0.077 |
Circ | +0.075 |
ride | +0.073 |
Across | +0.073 |
Neg logit contributions | |
anut | -0.104 |
i | -0.102 |
impressed | -0.099 |
bled | -0.098 |
bly | -0.097 |
Token | town |
Token ID | 3240 |
Feature activation | +8.46e+00 |
Pos logit contributions | |
anut | +0.052 |
i | +0.052 |
impressed | +0.050 |
careful | +0.049 |
bly | +0.049 |
Neg logit contributions | |
There | -0.021 |
This | -0.021 |
Circ | -0.020 |
To | -0.019 |
Where | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | -5.91e+00 |
Pos logit contributions | |
Circ | +0.266 |
There | +0.264 |
This | +0.256 |
To | +0.254 |
growth | +0.253 |
Neg logit contributions | |
anut | -0.175 |
impressed | -0.173 |
i | -0.172 |
glad | -0.162 |
careful | -0.161 |
Token | there |
Token ID | 612 |
Feature activation | +1.64e+01 |
Pos logit contributions | |
anut | +0.212 |
i | +0.206 |
impressed | +0.202 |
bled | +0.198 |
bly | +0.198 |
Neg logit contributions | |
There | -0.105 |
This | -0.102 |
Circ | -0.095 |
Where | -0.094 |
Across | -0.094 |
Token | was |
Token ID | 373 |
Feature activation | -5.08e-01 |
Pos logit contributions | |
There | +0.416 |
This | +0.407 |
Circ | +0.407 |
To | +0.393 |
Across | +0.391 |
Neg logit contributions | |
anut | -0.438 |
i | -0.431 |
impressed | -0.421 |
bled | -0.410 |
bly | -0.404 |
Token | a |
Token ID | 257 |
Feature activation | +3.37e+00 |
Pos logit contributions | |
anut | +0.016 |
impressed | +0.016 |
i | +0.016 |
bled | +0.015 |
bly | +0.015 |
Neg logit contributions | |
There | -0.010 |
This | -0.010 |
Circ | -0.010 |
Where | -0.010 |
To | -0.010 |
Token | big |
Token ID | 1263 |
Feature activation | -6.84e-02 |
Pos logit contributions | |
ride | +0.075 |
university | +0.074 |
There | +0.074 |
Circ | +0.071 |
This | +0.071 |
Neg logit contributions | |
anut | -0.104 |
i | -0.104 |
bled | -0.103 |
years | -0.099 |
bly | -0.099 |
Token | mill |
Token ID | 3939 |
Feature activation | +6.27e+00 |
Pos logit contributions | |
anut | +0.002 |
i | +0.002 |
impressed | +0.002 |
bled | +0.002 |
bly | +0.002 |
Neg logit contributions | |
There | -0.001 |
This | -0.001 |
ride | -0.001 |
Circ | -0.001 |
university | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +6.75e+00 |
Pos logit contributions | |
There | +0.149 |
Circ | +0.147 |
growth | +0.143 |
Where | +0.143 |
Across | +0.141 |
Neg logit contributions | |
anut | -0.183 |
impressed | -0.180 |
i | -0.178 |
careful | -0.173 |
bly | -0.172 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.57e-01 |
Pos logit contributions | |
grade | +0.208 |
Circ | +0.208 |
university | +0.207 |
ride | +0.207 |
dictionary | +0.207 |
Neg logit contributions | |
i | -0.271 |
aming | -0.263 |
gigg | -0.256 |
glad | -0.239 |
ll | -0.238 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.59e+00 |
Pos logit contributions | |
i | +0.010 |
impressed | +0.009 |
anut | +0.009 |
glad | +0.009 |
pleased | +0.009 |
Neg logit contributions | |
Circ | -0.009 |
growth | -0.009 |
This | -0.009 |
There | -0.009 |
ride | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +3.63e+00 |
Pos logit contributions | |
There | +0.172 |
Circ | +0.169 |
This | +0.169 |
Across | +0.162 |
To | +0.162 |
Neg logit contributions | |
anut | -0.218 |
impressed | -0.215 |
i | -0.211 |
careful | -0.210 |
glad | -0.207 |
Token | time |
Token ID | 640 |
Feature activation | +5.93e+00 |
Pos logit contributions | |
There | +0.037 |
This | +0.035 |
ride | +0.032 |
Circ | +0.032 |
On | +0.032 |
Neg logit contributions | |
anut | -0.156 |
i | -0.153 |
bled | -0.151 |
impressed | -0.150 |
bly | -0.149 |
Token | , |
Token ID | 11 |
Feature activation | -5.83e+00 |
Pos logit contributions | |
There | +0.168 |
This | +0.166 |
Where | +0.157 |
To | +0.157 |
Circ | +0.156 |
Neg logit contributions | |
anut | -0.152 |
i | -0.143 |
impressed | -0.140 |
bly | -0.137 |
bled | -0.137 |
Token | there |
Token ID | 612 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
anut | +0.219 |
i | +0.211 |
impressed | +0.207 |
years | +0.203 |
irling | +0.202 |
Neg logit contributions | |
There | -0.099 |
This | -0.094 |
Where | -0.087 |
Across | -0.087 |
On | -0.086 |
Token | was |
Token ID | 373 |
Feature activation | -4.75e-01 |
Pos logit contributions | |
There | +0.397 |
This | +0.389 |
Circ | +0.379 |
To | +0.370 |
Across | +0.369 |
Neg logit contributions | |
anut | -0.468 |
i | -0.454 |
impressed | -0.446 |
bled | -0.433 |
bly | -0.431 |
Token | a |
Token ID | 257 |
Feature activation | +3.53e+00 |
Pos logit contributions | |
anut | +0.020 |
i | +0.019 |
ising | +0.019 |
impressed | +0.019 |
ushed | +0.018 |
Neg logit contributions | |
There | -0.006 |
This | -0.006 |
On | -0.006 |
Circ | -0.006 |
Under | -0.006 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.71e+00 |
Pos logit contributions | |
ride | +0.075 |
university | +0.072 |
There | +0.070 |
visit | +0.069 |
Circ | +0.067 |
Neg logit contributions | |
bled | -0.116 |
i | -0.113 |
anut | -0.113 |
years | -0.111 |
bly | -0.109 |
Token | . |
Token ID | 13 |
Feature activation | +7.17e+00 |
Pos logit contributions | |
anut | +0.042 |
bled | +0.041 |
impressed | +0.040 |
bly | +0.038 |
i | +0.038 |
Neg logit contributions | |
There | -0.049 |
This | -0.049 |
ride | -0.047 |
to | -0.047 |
To | -0.046 |
Token | She |
Token ID | 1375 |
Feature activation | -2.61e+00 |
Pos logit contributions | |
shortcut | +0.197 |
ork | +0.189 |
Circ | +0.184 |
hike | +0.182 |
ride | +0.175 |
Neg logit contributions | |
gigg | -0.162 |
glad | -0.160 |
i | -0.155 |
enthusiastic | -0.152 |
years | -0.152 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.00e+01 |
Pos logit contributions | |
Circ | +0.106 |
ride | +0.102 |
university | +0.102 |
hike | +0.100 |
Across | +0.098 |
Neg logit contributions | |
anut | -0.212 |
glad | -0.209 |
impressed | -0.206 |
i | -0.204 |
thankful | -0.197 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.67e-01 |
Pos logit contributions | |
university | +0.220 |
grade | +0.215 |
dictionary | +0.214 |
ride | +0.214 |
hike | +0.211 |
Neg logit contributions | |
aming | -0.252 |
i | -0.239 |
gigg | -0.238 |
bled | -0.231 |
glad | -0.229 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.48e+00 |
Pos logit contributions | |
anut | +0.010 |
i | +0.010 |
impressed | +0.010 |
glad | +0.010 |
bled | +0.009 |
Neg logit contributions | |
Circ | -0.009 |
There | -0.009 |
This | -0.009 |
growth | -0.009 |
Across | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +3.59e+00 |
Pos logit contributions | |
There | +0.168 |
This | +0.164 |
Circ | +0.162 |
Across | +0.157 |
Where | +0.156 |
Neg logit contributions | |
anut | -0.221 |
impressed | -0.216 |
i | -0.211 |
careful | -0.210 |
glad | -0.208 |
Token | time |
Token ID | 640 |
Feature activation | +5.88e+00 |
Pos logit contributions | |
There | +0.033 |
This | +0.032 |
ride | +0.029 |
On | +0.029 |
Circ | +0.028 |
Neg logit contributions | |
anut | -0.156 |
i | -0.154 |
bled | -0.154 |
bly | -0.152 |
impressed | -0.152 |
Token | there |
Token ID | 612 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
There | +0.169 |
This | +0.166 |
Circ | +0.159 |
Where | +0.158 |
To | +0.157 |
Neg logit contributions | |
anut | -0.147 |
i | -0.140 |
impressed | -0.136 |
bly | -0.133 |
bled | -0.133 |
Token | was |
Token ID | 373 |
Feature activation | -5.64e-01 |
Pos logit contributions | |
There | +0.396 |
This | +0.389 |
Circ | +0.379 |
To | +0.370 |
Across | +0.370 |
Neg logit contributions | |
anut | -0.467 |
i | -0.453 |
impressed | -0.444 |
bled | -0.431 |
bly | -0.429 |
Token | a |
Token ID | 257 |
Feature activation | +3.60e+00 |
Pos logit contributions | |
anut | +0.023 |
i | +0.023 |
ushed | +0.022 |
ising | +0.022 |
impressed | +0.022 |
Neg logit contributions | |
There | -0.008 |
This | -0.007 |
On | -0.007 |
Circ | -0.007 |
ride | -0.007 |
Token | young |
Token ID | 1862 |
Feature activation | -5.03e+00 |
Pos logit contributions | |
ride | +0.077 |
university | +0.072 |
There | +0.072 |
visit | +0.071 |
Circ | +0.070 |
Neg logit contributions | |
bled | -0.117 |
i | -0.114 |
anut | -0.114 |
years | -0.113 |
bly | -0.112 |
Token | man |
Token ID | 582 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
anut | +0.160 |
bled | +0.153 |
i | +0.152 |
impressed | +0.150 |
years | +0.150 |
Neg logit contributions | |
There | -0.106 |
ride | -0.104 |
This | -0.103 |
Circ | -0.101 |
university | -0.099 |
Token | , |
Token ID | 11 |
Feature activation | -5.93e+00 |
Pos logit contributions | |
There | +0.051 |
Circ | +0.050 |
This | +0.050 |
Across | +0.049 |
Where | +0.049 |
Neg logit contributions | |
anut | -0.036 |
i | -0.036 |
impressed | -0.033 |
careful | -0.032 |
years | -0.032 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.00e+01 |
Pos logit contributions | |
bly | +0.055 |
irling | +0.054 |
aming | +0.054 |
anut | +0.053 |
ising | +0.051 |
Neg logit contributions | |
ride | -0.054 |
Circ | -0.054 |
There | -0.053 |
Where | -0.052 |
to | -0.052 |
Token | Once |
Token ID | 7454 |
Feature activation | -1.69e-01 |
Pos logit contributions | |
dictionary | +0.214 |
shortcut | +0.213 |
ride | +0.210 |
Circ | +0.210 |
grade | +0.204 |
Neg logit contributions | |
aming | -0.273 |
i | -0.270 |
gigg | -0.246 |
m | -0.241 |
bly | -0.240 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.57e+00 |
Pos logit contributions | |
i | +0.005 |
anut | +0.005 |
impressed | +0.005 |
glad | +0.004 |
careful | +0.004 |
Neg logit contributions | |
Circ | -0.004 |
This | -0.004 |
There | -0.004 |
growth | -0.004 |
Across | -0.004 |
Token | a |
Token ID | 257 |
Feature activation | +3.58e+00 |
Pos logit contributions | |
There | +0.173 |
Circ | +0.172 |
This | +0.171 |
Across | +0.163 |
Where | +0.163 |
Neg logit contributions | |
anut | -0.213 |
impressed | -0.211 |
i | -0.206 |
careful | -0.206 |
glad | -0.205 |
Token | time |
Token ID | 640 |
Feature activation | +5.99e+00 |
Pos logit contributions | |
There | +0.035 |
This | +0.033 |
ride | +0.030 |
On | +0.030 |
Circ | +0.029 |
Neg logit contributions | |
anut | -0.155 |
i | -0.152 |
bled | -0.151 |
impressed | -0.149 |
bly | -0.149 |
Token | there |
Token ID | 612 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
There | +0.174 |
This | +0.171 |
Circ | +0.166 |
To | +0.164 |
Where | +0.164 |
Neg logit contributions | |
anut | -0.145 |
i | -0.138 |
impressed | -0.135 |
bled | -0.131 |
bly | -0.131 |
Token | was |
Token ID | 373 |
Feature activation | -5.85e-01 |
Pos logit contributions | |
There | +0.380 |
This | +0.373 |
Circ | +0.357 |
Where | +0.352 |
To | +0.351 |
Neg logit contributions | |
anut | -0.491 |
i | -0.471 |
impressed | -0.464 |
bly | -0.452 |
bled | -0.452 |
Token | a |
Token ID | 257 |
Feature activation | +3.53e+00 |
Pos logit contributions | |
anut | +0.024 |
i | +0.024 |
impressed | +0.023 |
ushed | +0.023 |
ising | +0.023 |
Neg logit contributions | |
There | -0.008 |
This | -0.007 |
On | -0.007 |
Under | -0.007 |
ride | -0.006 |
Token | mom |
Token ID | 1995 |
Feature activation | -2.35e+00 |
Pos logit contributions | |
ride | +0.077 |
university | +0.074 |
There | +0.073 |
visit | +0.072 |
Circ | +0.070 |
Neg logit contributions | |
bled | -0.112 |
anut | -0.110 |
i | -0.109 |
years | -0.107 |
bly | -0.106 |
Token | my |
Token ID | 1820 |
Feature activation | -5.21e+00 |
Pos logit contributions | |
anut | +0.066 |
bled | +0.066 |
impressed | +0.064 |
bly | +0.063 |
irling | +0.063 |
Neg logit contributions | |
There | -0.059 |
This | -0.058 |
ride | -0.057 |
to | -0.056 |
visit | -0.054 |
Token | and |
Token ID | 290 |
Feature activation | -5.23e+00 |
Pos logit contributions | |
anut | +0.153 |
i | +0.150 |
impressed | +0.144 |
bly | +0.139 |
careful | +0.138 |
Neg logit contributions | |
There | -0.125 |
This | -0.122 |
Circ | -0.121 |
Across | -0.118 |
Where | -0.117 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.00e+01 |
Pos logit contributions | |
anut | +0.171 |
i | +0.163 |
impressed | +0.160 |
bled | +0.157 |
bly | +0.156 |
Neg logit contributions | |
There | -0.161 |
This | -0.158 |
Circ | -0.152 |
ride | -0.151 |
To | -0.150 |
Token | Once |
Token ID | 7454 |
Feature activation | -3.61e-01 |
Pos logit contributions | |
dictionary | +0.216 |
Circ | +0.210 |
grade | +0.209 |
ride | +0.209 |
shortcut | +0.206 |
Neg logit contributions | |
aming | -0.273 |
i | -0.269 |
gigg | -0.248 |
bly | -0.241 |
m | -0.240 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.55e+00 |
Pos logit contributions | |
i | +0.010 |
impressed | +0.009 |
anut | +0.009 |
glad | +0.009 |
pleased | +0.009 |
Neg logit contributions | |
Circ | -0.010 |
growth | -0.009 |
This | -0.009 |
ride | -0.009 |
There | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +3.68e+00 |
Pos logit contributions | |
There | +0.171 |
Circ | +0.167 |
This | +0.167 |
Across | +0.161 |
To | +0.161 |
Neg logit contributions | |
anut | -0.218 |
impressed | -0.214 |
careful | -0.210 |
i | -0.209 |
glad | -0.208 |
Token | time |
Token ID | 640 |
Feature activation | +5.99e+00 |
Pos logit contributions | |
There | +0.038 |
This | +0.036 |
ride | +0.033 |
Circ | +0.032 |
On | +0.032 |
Neg logit contributions | |
anut | -0.158 |
i | -0.155 |
bled | -0.153 |
impressed | -0.152 |
bly | -0.151 |
Token | there |
Token ID | 612 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
There | +0.168 |
This | +0.165 |
Where | +0.156 |
To | +0.155 |
Across | +0.155 |
Neg logit contributions | |
anut | -0.157 |
i | -0.147 |
impressed | -0.145 |
bly | -0.142 |
bled | -0.141 |
Token | was |
Token ID | 373 |
Feature activation | -4.55e-01 |
Pos logit contributions | |
There | +0.416 |
This | +0.409 |
Circ | +0.406 |
To | +0.393 |
Across | +0.392 |
Neg logit contributions | |
anut | -0.436 |
i | -0.431 |
impressed | -0.420 |
bled | -0.405 |
bly | -0.402 |
Token | a |
Token ID | 257 |
Feature activation | +3.66e+00 |
Pos logit contributions | |
anut | +0.019 |
i | +0.019 |
ushed | +0.018 |
impressed | +0.018 |
maid | +0.018 |
Neg logit contributions | |
There | -0.006 |
This | -0.006 |
On | -0.005 |
Under | -0.005 |
ride | -0.005 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.72e+00 |
Pos logit contributions | |
ride | +0.080 |
university | +0.075 |
There | +0.074 |
visit | +0.074 |
Circ | +0.071 |
Neg logit contributions | |
bled | -0.119 |
anut | -0.115 |
i | -0.114 |
years | -0.112 |
bly | -0.112 |
Token | named |
Token ID | 3706 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
anut | +0.039 |
bled | +0.037 |
impressed | +0.037 |
i | +0.036 |
bly | +0.035 |
Neg logit contributions | |
There | -0.052 |
This | -0.052 |
ride | -0.050 |
To | -0.050 |
Where | -0.049 |
Token | Maria |
Token ID | 14200 |
Feature activation | -3.89e+00 |
Pos logit contributions | |
anut | +0.036 |
i | +0.035 |
careful | +0.033 |
years | +0.033 |
glad | +0.032 |
Neg logit contributions | |
Circ | -0.040 |
growth | -0.039 |
ride | -0.038 |
There | -0.038 |
Across | -0.038 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.02e+01 |
Pos logit contributions | |
anut | +0.263 |
impressed | +0.261 |
glad | +0.258 |
i | +0.255 |
careful | +0.255 |
Neg logit contributions | |
Circ | -0.151 |
Across | -0.136 |
growth | -0.136 |
There | -0.134 |
university | -0.130 |
Token | Once |
Token ID | 7454 |
Feature activation | -1.81e-01 |
Pos logit contributions | |
grade | +0.211 |
university | +0.208 |
dictionary | +0.207 |
Circ | +0.205 |
ride | +0.205 |
Neg logit contributions | |
i | -0.271 |
aming | -0.267 |
gigg | -0.257 |
glad | -0.245 |
efully | -0.243 |
Token | upon |
Token ID | 2402 |
Feature activation | +7.54e+00 |
Pos logit contributions | |
i | +0.005 |
impressed | +0.005 |
anut | +0.005 |
glad | +0.005 |
pleased | +0.004 |
Neg logit contributions | |
Circ | -0.005 |
growth | -0.005 |
This | -0.005 |
There | -0.005 |
Across | -0.004 |
Token | a |
Token ID | 257 |
Feature activation | +3.47e+00 |
Pos logit contributions | |
There | +0.173 |
This | +0.169 |
Circ | +0.169 |
Across | +0.162 |
To | +0.162 |
Neg logit contributions | |
anut | -0.216 |
impressed | -0.212 |
careful | -0.207 |
i | -0.206 |
glad | -0.205 |
Token | time |
Token ID | 640 |
Feature activation | +5.92e+00 |
Pos logit contributions | |
There | +0.039 |
This | +0.037 |
Circ | +0.034 |
ride | +0.034 |
Across | +0.033 |
Neg logit contributions | |
anut | -0.145 |
i | -0.142 |
impressed | -0.140 |
bled | -0.139 |
bly | -0.138 |
Token | there |
Token ID | 612 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
There | +0.171 |
This | +0.168 |
Circ | +0.161 |
Where | +0.160 |
To | +0.160 |
Neg logit contributions | |
anut | -0.147 |
i | -0.139 |
impressed | -0.136 |
bly | -0.133 |
bled | -0.132 |
Token | was |
Token ID | 373 |
Feature activation | -6.16e-01 |
Pos logit contributions | |
There | +0.409 |
This | +0.401 |
Circ | +0.396 |
To | +0.384 |
Across | +0.384 |
Neg logit contributions | |
anut | -0.447 |
i | -0.438 |
impressed | -0.428 |
bled | -0.413 |
bly | -0.410 |
Token | a |
Token ID | 257 |
Feature activation | +3.40e+00 |
Pos logit contributions | |
anut | +0.026 |
i | +0.025 |
ushed | +0.024 |
impressed | +0.024 |
maid | +0.024 |
Neg logit contributions | |
There | -0.008 |
This | -0.008 |
On | -0.007 |
Under | -0.007 |
Circ | -0.007 |
Token | little |
Token ID | 1310 |
Feature activation | -3.72e+00 |
Pos logit contributions | |
ride | +0.076 |
university | +0.072 |
There | +0.072 |
visit | +0.070 |
Circ | +0.069 |
Neg logit contributions | |
bled | -0.107 |
anut | -0.105 |
i | -0.105 |
years | -0.102 |
bly | -0.102 |
Token | boy |
Token ID | 2933 |
Feature activation | -8.00e-01 |
Pos logit contributions | |
bled | +0.128 |
anut | +0.127 |
irling | +0.123 |
impressed | +0.122 |
years | +0.121 |
Neg logit contributions | |
ride | -0.073 |
There | -0.070 |
Circ | -0.068 |
This | -0.067 |
university | -0.067 |
Token | . |
Token ID | 13 |
Feature activation | +6.94e+00 |
Pos logit contributions | |
anut | +0.018 |
bled | +0.017 |
impressed | +0.017 |
i | +0.017 |
bly | +0.017 |
Neg logit contributions | |
This | -0.024 |
There | -0.024 |
ride | -0.023 |
To | -0.023 |
Where | -0.023 |
Token | and |
Token ID | 290 |
Feature activation | -1.15e+01 |
Pos logit contributions | |
impressed | +0.363 |
anut | +0.362 |
i | +0.357 |
glad | +0.348 |
careful | +0.348 |
Neg logit contributions | |
Circ | -0.306 |
There | -0.298 |
This | -0.296 |
growth | -0.288 |
Across | -0.286 |
Token | listening |
Token ID | 8680 |
Feature activation | -7.98e+00 |
Pos logit contributions | |
anut | +0.328 |
i | +0.317 |
impressed | +0.311 |
bled | +0.302 |
bly | +0.302 |
Neg logit contributions | |
There | -0.279 |
This | -0.274 |
Circ | -0.265 |
To | -0.260 |
Where | -0.260 |
Token | . |
Token ID | 13 |
Feature activation | +7.96e-01 |
Pos logit contributions | |
impressed | +0.283 |
i | +0.282 |
anut | +0.281 |
careful | +0.275 |
bly | +0.271 |
Neg logit contributions | |
There | -0.133 |
Circ | -0.126 |
Where | -0.121 |
This | -0.120 |
Across | -0.119 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +4.52e+00 |
Pos logit contributions | |
Circ | +0.015 |
ride | +0.015 |
ork | +0.014 |
hike | +0.014 |
shortcut | +0.014 |
Neg logit contributions | |
anut | -0.024 |
i | -0.024 |
glad | -0.024 |
impressed | -0.024 |
years | -0.023 |
Token | M |
Token ID | 44 |
Feature activation | -1.17e+01 |
Pos logit contributions | |
ride | +0.104 |
Find | +0.104 |
hike | +0.103 |
grade | +0.103 |
Circ | +0.102 |
Neg logit contributions | |
glad | -0.099 |
gigg | -0.097 |
i | -0.095 |
bled | -0.093 |
aming | -0.093 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.31e+01 |
Pos logit contributions | |
anut | +0.273 |
impressed | +0.262 |
bly | +0.257 |
bled | +0.257 |
i | +0.250 |
Neg logit contributions | |
There | -0.350 |
This | -0.345 |
ride | -0.330 |
On | -0.330 |
To | -0.330 |
Token | a |
Token ID | 64 |
Feature activation | -1.10e+01 |
Pos logit contributions | |
irling | +0.534 |
bly | +0.479 |
bled | +0.472 |
ising | +0.468 |
ishly | +0.462 |
Neg logit contributions | |
to | -0.653 |
| -0.642 |
ray | -0.585 |
ork | -0.576 |
ride | -0.575 |
Token | and |
Token ID | 290 |
Feature activation | -8.94e+00 |
Pos logit contributions | |
i | +0.251 |
anut | +0.248 |
impressed | +0.231 |
careful | +0.219 |
irling | +0.217 |
Neg logit contributions | |
There | -0.333 |
Circ | -0.329 |
This | -0.324 |
Where | -0.320 |
Across | -0.320 |
Token | Billy |
Token ID | 15890 |
Feature activation | -6.45e+00 |
Pos logit contributions | |
anut | +0.245 |
i | +0.240 |
impressed | +0.233 |
bled | +0.225 |
bly | +0.224 |
Neg logit contributions | |
There | -0.226 |
This | -0.223 |
Circ | -0.219 |
Across | -0.214 |
To | -0.213 |
Token | were |
Token ID | 547 |
Feature activation | -4.72e+00 |
Pos logit contributions | |
i | +0.188 |
anut | +0.183 |
impressed | +0.169 |
gigg | +0.167 |
glad | +0.163 |
Neg logit contributions | |
This | -0.153 |
There | -0.153 |
Circ | -0.153 |
Across | -0.147 |
Where | -0.147 |
Token | walking |
Token ID | 6155 |
Feature activation | +4.50e+00 |
Pos logit contributions | |
anut | +0.147 |
i | +0.142 |
ushed | +0.139 |
bly | +0.138 |
ising | +0.138 |
Neg logit contributions | |
There | -0.109 |
This | -0.104 |
to | -0.100 |
To | -0.100 |
university | -0.098 |
Token | Bear |
Token ID | 14732 |
Feature activation | -9.20e+00 |
Pos logit contributions | |
anut | +0.376 |
i | +0.365 |
impressed | +0.358 |
bled | +0.348 |
bly | +0.348 |
Neg logit contributions | |
There | -0.252 |
This | -0.246 |
Circ | -0.238 |
Across | -0.232 |
To | -0.232 |
Token | said |
Token ID | 531 |
Feature activation | -7.47e+00 |
Pos logit contributions | |
anut | +0.312 |
i | +0.304 |
impressed | +0.297 |
bly | +0.290 |
bled | +0.289 |
Neg logit contributions | |
There | -0.177 |
This | -0.172 |
Circ | -0.167 |
Where | -0.162 |
Across | -0.162 |
Token | , |
Token ID | 11 |
Feature activation | -9.93e+00 |
Pos logit contributions | |
anut | +0.217 |
i | +0.208 |
impressed | +0.204 |
bled | +0.197 |
bly | +0.197 |
Neg logit contributions | |
There | -0.181 |
This | -0.179 |
Circ | -0.170 |
To | -0.169 |
Where | -0.169 |
Token | " |
Token ID | 366 |
Feature activation | +2.65e+00 |
Pos logit contributions | |
anut | +0.377 |
i | +0.371 |
impressed | +0.369 |
careful | +0.357 |
bly | +0.357 |
Neg logit contributions | |
There | -0.142 |
This | -0.135 |
Circ | -0.135 |
Across | -0.128 |
Where | -0.126 |
Token | M |
Token ID | 44 |
Feature activation | -1.04e+01 |
Pos logit contributions | |
There | +0.063 |
This | +0.061 |
Circ | +0.059 |
Across | +0.058 |
To | +0.058 |
Neg logit contributions | |
anut | -0.078 |
i | -0.076 |
impressed | -0.074 |
bly | -0.073 |
bled | -0.072 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.12e+01 |
Pos logit contributions | |
anut | +0.198 |
impressed | +0.196 |
bled | +0.175 |
years | +0.175 |
irling | +0.169 |
Neg logit contributions | |
There | -0.360 |
This | -0.354 |
On | -0.347 |
To | -0.340 |
Across | -0.337 |
Token | a |
Token ID | 64 |
Feature activation | -9.15e+00 |
Pos logit contributions | |
irling | +0.462 |
impressed | +0.435 |
bled | +0.423 |
anut | +0.418 |
bly | +0.402 |
Neg logit contributions | |
This | -0.643 |
to | -0.642 |
There | -0.630 |
| -0.618 |
Where | -0.609 |
Token | Bear |
Token ID | 14732 |
Feature activation | -7.88e+00 |
Pos logit contributions | |
anut | +0.234 |
i | +0.224 |
impressed | +0.222 |
bled | +0.216 |
bly | +0.214 |
Neg logit contributions | |
There | -0.249 |
This | -0.247 |
Circ | -0.237 |
To | -0.234 |
ride | -0.234 |
Token | , |
Token ID | 11 |
Feature activation | -8.64e+00 |
Pos logit contributions | |
anut | +0.218 |
impressed | +0.210 |
i | +0.207 |
bled | +0.204 |
years | +0.202 |
Neg logit contributions | |
There | -0.197 |
This | -0.197 |
Circ | -0.185 |
To | -0.185 |
ride | -0.185 |
Token | it |
Token ID | 340 |
Feature activation | +4.64e+00 |
Pos logit contributions | |
anut | +0.267 |
i | +0.265 |
impressed | +0.255 |
careful | +0.252 |
bly | +0.248 |
Neg logit contributions | |
There | -0.184 |
Circ | -0.183 |
This | -0.177 |
Across | -0.176 |
On | -0.173 |
Token | looks |
Token ID | 3073 |
Feature activation | -5.32e+00 |
Pos logit contributions | |
There | +0.092 |
This | +0.090 |
Circ | +0.087 |
To | +0.084 |
Where | +0.084 |
Neg logit contributions | |
anut | -0.154 |
i | -0.149 |
impressed | -0.147 |
bled | -0.143 |
bly | -0.143 |
Token | 't |
Token ID | 470 |
Feature activation | -1.12e+01 |
Pos logit contributions | |
i | +0.419 |
anut | +0.408 |
bly | +0.401 |
impressed | +0.401 |
bled | +0.400 |
Neg logit contributions | |
Circ | -0.243 |
There | -0.233 |
Across | -0.230 |
This | -0.219 |
growth | -0.217 |
Token | cry |
Token ID | 3960 |
Feature activation | -1.24e+01 |
Pos logit contributions | |
anut | +0.273 |
i | +0.257 |
impressed | +0.256 |
bled | +0.250 |
years | +0.247 |
Neg logit contributions | |
There | -0.324 |
This | -0.320 |
ride | -0.312 |
Circ | -0.307 |
Where | -0.306 |
Token | , |
Token ID | 11 |
Feature activation | -1.29e+01 |
Pos logit contributions | |
anut | +0.367 |
impressed | +0.343 |
years | +0.341 |
irling | +0.331 |
bled | +0.331 |
Neg logit contributions | |
This | -0.304 |
There | -0.301 |
to | -0.297 |
Where | -0.287 |
ride | -0.280 |
Token | boys |
Token ID | 6510 |
Feature activation | -3.54e+00 |
Pos logit contributions | |
anut | +0.371 |
i | +0.370 |
impressed | +0.365 |
careful | +0.364 |
glad | +0.359 |
Neg logit contributions | |
Circ | -0.292 |
This | -0.288 |
Across | -0.288 |
There | -0.287 |
On | -0.279 |
Token | . |
Token ID | 13 |
Feature activation | +1.66e+00 |
Pos logit contributions | |
anut | +0.126 |
impressed | +0.121 |
i | +0.120 |
bled | +0.119 |
years | +0.119 |
Neg logit contributions | |
There | -0.062 |
This | -0.062 |
ride | -0.057 |
To | -0.056 |
Where | -0.056 |
Token | Acc |
Token ID | 6366 |
Feature activation | -2.12e+01 |
Pos logit contributions | |
There | +0.027 |
This | +0.027 |
Circ | +0.026 |
ride | +0.025 |
Across | +0.025 |
Neg logit contributions | |
anut | -0.060 |
i | -0.058 |
impressed | -0.058 |
bly | -0.056 |
bled | -0.056 |
Token | idents |
Token ID | 3231 |
Feature activation | -1.27e+01 |
Pos logit contributions | |
years | +0.597 |
bly | +0.553 |
anut | +0.550 |
aming | +0.541 |
impressed | +0.515 |
Neg logit contributions | |
This | -0.590 |
to | -0.545 |
There | -0.532 |
Where | -0.532 |
ride | -0.513 |
Token | happen |
Token ID | 1645 |
Feature activation | -2.00e+00 |
Pos logit contributions | |
anut | +0.359 |
impressed | +0.358 |
i | +0.350 |
bly | +0.346 |
bled | +0.343 |
Neg logit contributions | |
There | -0.303 |
Circ | -0.302 |
growth | -0.290 |
Across | -0.287 |
This | -0.286 |
Token | . |
Token ID | 13 |
Feature activation | +2.91e+00 |
Pos logit contributions | |
years | +0.083 |
anut | +0.083 |
impressed | +0.079 |
i | +0.079 |
irling | +0.077 |
Neg logit contributions | |
to | -0.025 |
This | -0.024 |
There | -0.022 |
Where | -0.020 |
To | -0.019 |
Token | I |
Token ID | 314 |
Feature activation | +2.01e+00 |
Pos logit contributions | |
There | +0.049 |
This | +0.048 |
Where | +0.044 |
To | +0.043 |
On | +0.041 |
Neg logit contributions | |
anut | -0.112 |
i | -0.108 |
impressed | -0.106 |
bled | -0.105 |
irling | -0.105 |
Token | have |
Token ID | 423 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
There | +0.039 |
ride | +0.039 |
This | +0.038 |
to | +0.037 |
To | +0.037 |
Neg logit contributions | |
anut | -0.068 |
years | -0.065 |
i | -0.065 |
bled | -0.065 |
bly | -0.064 |
Token | their |
Token ID | 511 |
Feature activation | -5.66e+00 |
Pos logit contributions | |
This | +0.197 |
There | +0.191 |
To | +0.186 |
ride | +0.182 |
On | +0.180 |
Neg logit contributions | |
anut | -0.233 |
i | -0.223 |
impressed | -0.219 |
years | -0.216 |
bled | -0.212 |
Token | faces |
Token ID | 6698 |
Feature activation | +1.03e+00 |
Pos logit contributions | |
anut | +0.138 |
i | +0.133 |
impressed | +0.130 |
bly | +0.127 |
bled | +0.126 |
Neg logit contributions | |
There | -0.162 |
This | -0.160 |
Circ | -0.156 |
ride | -0.155 |
To | -0.153 |
Token | . |
Token ID | 13 |
Feature activation | +3.49e+00 |
Pos logit contributions | |
Circ | +0.019 |
There | +0.019 |
This | +0.019 |
Where | +0.018 |
Across | +0.018 |
Neg logit contributions | |
i | -0.035 |
anut | -0.034 |
impressed | -0.034 |
bled | -0.033 |
bly | -0.033 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +7.38e+00 |
Pos logit contributions | |
Circ | +0.067 |
hike | +0.066 |
ork | +0.066 |
university | +0.065 |
growth | +0.063 |
Neg logit contributions | |
glad | -0.111 |
impressed | -0.105 |
thankful | -0.103 |
grateful | -0.102 |
pleased | -0.100 |
Token | M |
Token ID | 44 |
Feature activation | -1.00e+01 |
Pos logit contributions | |
grade | +0.167 |
university | +0.164 |
dictionary | +0.164 |
ride | +0.160 |
earned | +0.158 |
Neg logit contributions | |
aming | -0.173 |
i | -0.172 |
bled | -0.165 |
gigg | -0.165 |
glad | -0.162 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.07e+01 |
Pos logit contributions | |
anut | +0.297 |
impressed | +0.285 |
bly | +0.281 |
bled | +0.281 |
i | +0.277 |
Neg logit contributions | |
There | -0.245 |
This | -0.240 |
On | -0.229 |
ride | -0.226 |
To | -0.226 |
Token | a |
Token ID | 64 |
Feature activation | -8.55e+00 |
Pos logit contributions | |
irling | +0.543 |
ising | +0.495 |
thirst | +0.485 |
bled | +0.468 |
impressed | +0.462 |
Neg logit contributions | |
to | -0.554 |
ray | -0.493 |
| -0.491 |
ork | -0.471 |
To | -0.464 |
Token | and |
Token ID | 290 |
Feature activation | -8.07e+00 |
Pos logit contributions | |
i | +0.204 |
anut | +0.203 |
impressed | +0.193 |
bled | +0.181 |
gigg | +0.180 |
Neg logit contributions | |
There | -0.244 |
This | -0.241 |
Circ | -0.239 |
Where | -0.235 |
Across | -0.232 |
Token | Ron |
Token ID | 6575 |
Feature activation | -8.23e+00 |
Pos logit contributions | |
anut | +0.220 |
i | +0.219 |
impressed | +0.211 |
years | +0.202 |
bled | +0.201 |
Neg logit contributions | |
There | -0.200 |
This | -0.197 |
Circ | -0.196 |
Across | -0.190 |
To | -0.188 |
Token | ny |
Token ID | 3281 |
Feature activation | -9.98e+00 |
Pos logit contributions | |
anut | +0.243 |
impressed | +0.237 |
i | +0.228 |
irling | +0.228 |
bly | +0.227 |
Neg logit contributions | |
There | -0.196 |
This | -0.193 |
ride | -0.184 |
To | -0.182 |
On | -0.181 |
Token | were |
Token ID | 547 |
Feature activation | -4.12e+00 |
Pos logit contributions | |
anut | +0.267 |
i | +0.264 |
impressed | +0.253 |
bled | +0.244 |
bly | +0.242 |
Neg logit contributions | |
There | -0.255 |
This | -0.251 |
Circ | -0.248 |
Across | -0.240 |
Where | -0.240 |
Token | â |
Token ID | 22940 |
Feature activation | -1.29e+01 |
Pos logit contributions | |
hike | +0.064 |
ork | +0.063 |
Circ | +0.060 |
stairs | +0.058 |
ride | +0.058 |
Neg logit contributions | |
glad | -0.099 |
careful | -0.094 |
impressed | -0.094 |
grateful | -0.093 |
thankful | -0.092 |
Token | € |
Token ID | 26391 |
Feature activation | -1.07e+01 |
Pos logit contributions | |
anut | +0.344 |
i | +0.336 |
bly | +0.316 |
bled | +0.314 |
impressed | +0.313 |
Neg logit contributions | |
There | -0.329 |
Circ | -0.327 |
Where | -0.315 |
university | -0.312 |
Across | -0.310 |
Token |
|
Token ID | 198 |
Feature activation | +2.92e+00 |
Pos logit contributions | |
impressed | +0.308 |
glad | +0.307 |
anut | +0.303 |
careful | +0.302 |
irling | +0.296 |
Neg logit contributions | |
Circ | -0.232 |
growth | -0.226 |
Across | -0.219 |
shortcut | -0.211 |
corner | -0.210 |
Token |
|
Token ID | 198 |
Feature activation | +2.91e+00 |
Pos logit contributions | |
There | +0.057 |
university | +0.055 |
Circ | +0.055 |
Across | +0.055 |
ride | +0.053 |
Neg logit contributions | |
impressed | -0.096 |
anut | -0.093 |
glad | -0.093 |
pleased | -0.090 |
bled | -0.089 |
Token | M |
Token ID | 44 |
Feature activation | -1.00e+01 |
Pos logit contributions | |
There | +0.077 |
This | +0.074 |
Circ | +0.072 |
Where | +0.071 |
ride | +0.071 |
Neg logit contributions | |
anut | -0.078 |
impressed | -0.076 |
bly | -0.073 |
irling | -0.073 |
careful | -0.073 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.07e+01 |
Pos logit contributions | |
anut | +0.173 |
impressed | +0.161 |
i | +0.160 |
bly | +0.152 |
bled | +0.151 |
Neg logit contributions | |
There | -0.363 |
This | -0.358 |
Circ | -0.348 |
To | -0.345 |
Where | -0.345 |
Token | a |
Token ID | 64 |
Feature activation | -8.67e+00 |
Pos logit contributions | |
irling | +0.528 |
anut | +0.470 |
bled | +0.447 |
itching | +0.444 |
ising | +0.437 |
Neg logit contributions | |
to | -0.576 |
To | -0.546 |
ork | -0.532 |
ray | -0.525 |
ride | -0.523 |
Token | said |
Token ID | 531 |
Feature activation | -6.65e+00 |
Pos logit contributions | |
anut | +0.214 |
i | +0.199 |
impressed | +0.195 |
bly | +0.194 |
bled | +0.193 |
Neg logit contributions | |
There | -0.248 |
This | -0.245 |
ride | -0.236 |
To | -0.236 |
Circ | -0.235 |
Token | , |
Token ID | 11 |
Feature activation | -9.11e+00 |
Pos logit contributions | |
anut | +0.152 |
i | +0.150 |
impressed | +0.145 |
bled | +0.140 |
bly | +0.139 |
Neg logit contributions | |
There | -0.197 |
Circ | -0.191 |
This | -0.191 |
Across | -0.188 |
ride | -0.186 |
Token | � |
Token ID | 6184 |
Feature activation | -8.32e+00 |
Pos logit contributions | |
impressed | +0.293 |
glad | +0.288 |
i | +0.283 |
pleased | +0.282 |
bled | +0.280 |
Neg logit contributions | |
Circ | -0.174 |
growth | -0.174 |
hike | -0.164 |
university | -0.158 |
ride | -0.157 |
Token | � |
Token ID | 95 |
Feature activation | -6.37e+00 |
Pos logit contributions | |
anut | +0.251 |
i | +0.241 |
impressed | +0.230 |
bly | +0.229 |
irling | +0.227 |
Neg logit contributions | |
There | -0.187 |
Circ | -0.183 |
Across | -0.177 |
ride | -0.173 |
Where | -0.171 |
Token | they |
Token ID | 484 |
Feature activation | -1.19e-01 |
Pos logit contributions | |
There | +0.007 |
This | +0.006 |
To | +0.006 |
Circ | +0.006 |
ride | +0.006 |
Neg logit contributions | |
anut | -0.008 |
impressed | -0.008 |
years | -0.008 |
i | -0.008 |
irling | -0.007 |
Token | arrived |
Token ID | 5284 |
Feature activation | +3.98e-01 |
Pos logit contributions | |
anut | +0.004 |
i | +0.004 |
impressed | +0.004 |
bled | +0.003 |
bly | +0.003 |
Neg logit contributions | |
There | -0.003 |
This | -0.003 |
Circ | -0.002 |
Where | -0.002 |
Across | -0.002 |
Token | home |
Token ID | 1363 |
Feature activation | +4.15e+00 |
Pos logit contributions | |
to | +0.009 |
There | +0.009 |
This | +0.009 |
To | +0.008 |
On | +0.008 |
Neg logit contributions | |
anut | -0.013 |
years | -0.012 |
impressed | -0.012 |
bly | -0.012 |
irling | -0.012 |
Token | , |
Token ID | 11 |
Feature activation | -9.39e+00 |
Pos logit contributions | |
to | +0.091 |
There | +0.086 |
This | +0.082 |
To | +0.080 |
| +0.079 |
Neg logit contributions | |
anut | -0.145 |
years | -0.133 |
irling | -0.132 |
bly | -0.126 |
ushed | -0.125 |
Token | M |
Token ID | 337 |
Feature activation | -1.20e+01 |
Pos logit contributions | |
anut | +0.286 |
irling | +0.269 |
years | +0.268 |
impressed | +0.262 |
ushed | +0.261 |
Neg logit contributions | |
There | -0.224 |
This | -0.222 |
To | -0.210 |
Where | -0.210 |
ride | -0.207 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.07e+01 |
Pos logit contributions | |
impressed | +0.276 |
irling | +0.256 |
anut | +0.255 |
careful | +0.251 |
bly | +0.250 |
Neg logit contributions | |
There | -0.373 |
This | -0.371 |
On | -0.355 |
| -0.354 |
To | -0.351 |
Token | a |
Token ID | 64 |
Feature activation | -8.78e+00 |
Pos logit contributions | |
irling | +0.582 |
ising | +0.473 |
bled | +0.468 |
anut | +0.468 |
ishment | +0.464 |
Neg logit contributions | |
to | -0.566 |
To | -0.507 |
ray | -0.489 |
ork | -0.483 |
ride | -0.466 |
Token | gave |
Token ID | 2921 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
anut | +0.229 |
bly | +0.216 |
bled | +0.211 |
i | +0.211 |
impressed | +0.210 |
Neg logit contributions | |
There | -0.235 |
This | -0.228 |
ride | -0.225 |
To | -0.225 |
to | -0.220 |
Token | Ron |
Token ID | 6575 |
Feature activation | -8.71e+00 |
Pos logit contributions | |
impressed | +0.089 |
irling | +0.087 |
anut | +0.087 |
pleased | +0.083 |
ushed | +0.082 |
Neg logit contributions | |
This | -0.061 |
There | -0.060 |
Circ | -0.060 |
To | -0.058 |
Where | -0.058 |
Token | ny |
Token ID | 3281 |
Feature activation | -1.02e+01 |
Pos logit contributions | |
impressed | +0.219 |
anut | +0.218 |
irling | +0.211 |
bled | +0.207 |
bly | +0.206 |
Neg logit contributions | |
This | -0.237 |
There | -0.235 |
Circ | -0.227 |
To | -0.226 |
Where | -0.225 |
Token | a |
Token ID | 257 |
Feature activation | +2.36e-01 |
Pos logit contributions | |
anut | +0.300 |
i | +0.293 |
careful | +0.292 |
impressed | +0.280 |
glad | +0.277 |
Neg logit contributions | |
There | -0.238 |
Circ | -0.226 |
Where | -0.225 |
university | -0.223 |
This | -0.222 |
Token | mustache |
Token ID | 49303 |
Feature activation | +2.09e+00 |
Pos logit contributions | |
There | +0.023 |
This | +0.021 |
growth | +0.021 |
On | +0.021 |
Circ | +0.021 |
Neg logit contributions | |
anut | -0.025 |
careful | -0.024 |
i | -0.023 |
impressed | -0.023 |
glad | -0.023 |
Token | !" |
Token ID | 2474 |
Feature activation | +2.62e+00 |
Pos logit contributions | |
There | +0.051 |
Circ | +0.049 |
growth | +0.049 |
Across | +0.048 |
Under | +0.046 |
Neg logit contributions | |
i | -0.061 |
anut | -0.059 |
careful | -0.058 |
akers | -0.054 |
ious | -0.054 |
Token |
|
Token ID | 198 |
Feature activation | +2.91e+00 |
Pos logit contributions | |
There | +0.063 |
This | +0.062 |
Circ | +0.061 |
ride | +0.060 |
Across | +0.059 |
Neg logit contributions | |
anut | -0.074 |
i | -0.072 |
impressed | -0.070 |
bled | -0.068 |
bly | -0.068 |
Token |
|
Token ID | 198 |
Feature activation | +2.92e+00 |
Pos logit contributions | |
There | +0.041 |
university | +0.041 |
ride | +0.040 |
Circ | +0.040 |
Across | +0.039 |
Neg logit contributions | |
impressed | -0.105 |
anut | -0.105 |
i | -0.102 |
careful | -0.101 |
glad | -0.101 |
Token | M |
Token ID | 44 |
Feature activation | -1.01e+01 |
Pos logit contributions | |
There | +0.076 |
This | +0.075 |
Circ | +0.073 |
To | +0.071 |
Across | +0.071 |
Neg logit contributions | |
anut | -0.080 |
i | -0.077 |
impressed | -0.075 |
bled | -0.073 |
bly | -0.073 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.06e+01 |
Pos logit contributions | |
impressed | +0.234 |
anut | +0.232 |
bly | +0.217 |
glad | +0.215 |
i | +0.215 |
Neg logit contributions | |
There | -0.309 |
This | -0.302 |
ork | -0.297 |
On | -0.297 |
To | -0.292 |
Token | a |
Token ID | 64 |
Feature activation | -8.64e+00 |
Pos logit contributions | |
irling | +0.540 |
anut | +0.462 |
glad | +0.451 |
itching | +0.440 |
bled | +0.436 |
Neg logit contributions | |
to | -0.564 |
To | -0.528 |
ork | -0.526 |
ray | -0.507 |
To | -0.499 |
Token | replied |
Token ID | 8712 |
Feature activation | -6.07e+00 |
Pos logit contributions | |
anut | +0.227 |
bly | +0.218 |
irling | +0.208 |
bled | +0.207 |
glad | +0.206 |
Neg logit contributions | |
There | -0.235 |
This | -0.230 |
to | -0.227 |
To | -0.225 |
ride | -0.224 |
Token | , |
Token ID | 11 |
Feature activation | -9.11e+00 |
Pos logit contributions | |
anut | +0.158 |
i | +0.150 |
impressed | +0.147 |
bly | +0.142 |
years | +0.141 |
Neg logit contributions | |
There | -0.168 |
This | -0.166 |
Circ | -0.159 |
To | -0.158 |
Where | -0.157 |
Token | " |
Token ID | 366 |
Feature activation | +2.69e+00 |
Pos logit contributions | |
anut | +0.353 |
impressed | +0.347 |
i | +0.346 |
bled | +0.340 |
bly | +0.336 |
Neg logit contributions | |
There | -0.120 |
Circ | -0.118 |
This | -0.114 |
ride | -0.110 |
university | -0.108 |
Token | Well |
Token ID | 5779 |
Feature activation | -7.13e+00 |
Pos logit contributions | |
There | +0.057 |
This | +0.056 |
Circ | +0.055 |
Where | +0.054 |
To | +0.053 |
Neg logit contributions | |
anut | -0.087 |
i | -0.084 |
impressed | -0.082 |
bled | -0.081 |
bly | -0.080 |
Token | it |
Token ID | 340 |
Feature activation | +4.92e+00 |
Pos logit contributions | |
There | +0.212 |
Circ | +0.203 |
Across | +0.202 |
ives | +0.194 |
Exit | +0.193 |
Neg logit contributions | |
i | -0.204 |
then | -0.189 |
anut | -0.188 |
bly | -0.186 |
gigg | -0.179 |
Token | . |
Token ID | 13 |
Feature activation | +3.81e+00 |
Pos logit contributions | |
Circ | +0.126 |
There | +0.116 |
growth | +0.114 |
Where | +0.112 |
Across | +0.111 |
Neg logit contributions | |
i | -0.139 |
anut | -0.133 |
impressed | -0.131 |
careful | -0.126 |
then | -0.125 |
Token |
|
Token ID | 198 |
Feature activation | +3.18e+00 |
Pos logit contributions | |
shortcut | +0.087 |
hike | +0.086 |
university | +0.085 |
growth | +0.083 |
Circ | +0.083 |
Neg logit contributions | |
anut | -0.102 |
impressed | -0.101 |
glad | -0.101 |
omo | -0.100 |
gigg | -0.100 |
Token |
|
Token ID | 198 |
Feature activation | +3.17e+00 |
Pos logit contributions | |
university | +0.050 |
There | +0.049 |
growth | +0.048 |
ride | +0.047 |
Across | +0.047 |
Neg logit contributions | |
impressed | -0.111 |
anut | -0.109 |
glad | -0.106 |
careful | -0.105 |
i | -0.105 |
Token | M |
Token ID | 44 |
Feature activation | -9.82e+00 |
Pos logit contributions | |
There | +0.081 |
This | +0.078 |
Circ | +0.076 |
university | +0.075 |
Where | +0.075 |
Neg logit contributions | |
anut | -0.087 |
impressed | -0.084 |
i | -0.082 |
careful | -0.081 |
bly | -0.081 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.06e+01 |
Pos logit contributions | |
impressed | +0.221 |
anut | +0.215 |
irling | +0.215 |
bly | +0.213 |
bled | +0.200 |
Neg logit contributions | |
There | -0.310 |
This | -0.303 |
Where | -0.296 |
To | -0.294 |
ork | -0.293 |
Token | a |
Token ID | 64 |
Feature activation | -8.54e+00 |
Pos logit contributions | |
irling | +0.546 |
itching | +0.462 |
ising | +0.459 |
anut | +0.457 |
thirst | +0.457 |
Neg logit contributions | |
to | -0.555 |
To | -0.515 |
ork | -0.489 |
ray | -0.479 |
Circ | -0.478 |
Token | and |
Token ID | 290 |
Feature activation | -8.31e+00 |
Pos logit contributions | |
anut | +0.219 |
i | +0.204 |
bly | +0.203 |
bled | +0.200 |
impressed | +0.200 |
Neg logit contributions | |
There | -0.236 |
This | -0.231 |
Circ | -0.225 |
ride | -0.225 |
To | -0.223 |
Token | Tom |
Token ID | 4186 |
Feature activation | -6.14e+00 |
Pos logit contributions | |
anut | +0.299 |
irling | +0.288 |
impressed | +0.287 |
bly | +0.284 |
i | +0.283 |
Neg logit contributions | |
There | -0.149 |
This | -0.144 |
Circ | -0.135 |
To | -0.134 |
ride | -0.133 |
Token | went |
Token ID | 1816 |
Feature activation | +1.24e+00 |
Pos logit contributions | |
anut | +0.179 |
bly | +0.170 |
irling | +0.167 |
impressed | +0.166 |
bled | +0.166 |
Neg logit contributions | |
There | -0.151 |
ride | -0.149 |
This | -0.143 |
On | -0.140 |
To | -0.139 |
Token | to |
Token ID | 284 |
Feature activation | +6.92e+00 |
Pos logit contributions | |
growth | +0.021 |
There | +0.020 |
Circ | +0.019 |
Where | +0.019 |
This | +0.019 |
Neg logit contributions | |
anut | -0.044 |
careful | -0.043 |
i | -0.043 |
impressed | -0.043 |
glad | -0.042 |
Token | and |
Token ID | 290 |
Feature activation | -1.08e+01 |
Pos logit contributions | |
anut | +0.461 |
impressed | +0.440 |
i | +0.439 |
bled | +0.439 |
bly | +0.435 |
Neg logit contributions | |
There | -0.201 |
This | -0.199 |
ride | -0.183 |
To | -0.182 |
Where | -0.181 |
Token | she |
Token ID | 673 |
Feature activation | -5.32e+00 |
Pos logit contributions | |
anut | +0.298 |
i | +0.295 |
impressed | +0.288 |
bly | +0.275 |
years | +0.275 |
Neg logit contributions | |
There | -0.268 |
This | -0.263 |
Circ | -0.261 |
Across | -0.253 |
To | -0.252 |
Token | was |
Token ID | 373 |
Feature activation | -5.64e+00 |
Pos logit contributions | |
anut | +0.149 |
bled | +0.137 |
i | +0.136 |
bly | +0.135 |
impressed | +0.134 |
Neg logit contributions | |
There | -0.137 |
This | -0.135 |
ride | -0.131 |
To | -0.129 |
Where | -0.129 |
Token | three |
Token ID | 1115 |
Feature activation | -9.35e+00 |
Pos logit contributions | |
anut | +0.134 |
bled | +0.122 |
i | +0.120 |
ushed | +0.119 |
ising | +0.119 |
Neg logit contributions | |
There | -0.171 |
This | -0.170 |
to | -0.166 |
To | -0.164 |
ride | -0.162 |
Token | - |
Token ID | 12 |
Feature activation | -5.07e+00 |
Pos logit contributions | |
bled | +0.178 |
anut | +0.162 |
bly | +0.149 |
ushed | +0.128 |
impressed | +0.125 |
Neg logit contributions | |
to | -0.326 |
This | -0.325 |
There | -0.323 |
ride | -0.321 |
corner | -0.313 |
Token | years |
Token ID | 19002 |
Feature activation | -2.06e+01 |
Pos logit contributions | |
anut | +0.043 |
i | +0.036 |
bled | +0.035 |
impressed | +0.035 |
maid | +0.034 |
Neg logit contributions | |
Circ | -0.218 |
This | -0.215 |
There | -0.215 |
to | -0.213 |
To | -0.212 |
Token | - |
Token ID | 12 |
Feature activation | -4.37e+00 |
Pos logit contributions | |
anut | +0.496 |
i | +0.469 |
impressed | +0.464 |
bled | +0.463 |
bly | +0.459 |
Neg logit contributions | |
This | -0.583 |
There | -0.583 |
Circ | -0.568 |
ride | -0.560 |
To | -0.553 |
Token | old |
Token ID | 727 |
Feature activation | -6.85e+00 |
Pos logit contributions | |
anut | +0.130 |
i | +0.125 |
impressed | +0.123 |
bled | +0.121 |
bly | +0.120 |
Neg logit contributions | |
There | -0.101 |
This | -0.100 |
Circ | -0.097 |
To | -0.095 |
ride | -0.095 |
Token | . |
Token ID | 13 |
Feature activation | +3.47e+00 |
Pos logit contributions | |
anut | +0.240 |
i | +0.233 |
impressed | +0.230 |
bled | +0.225 |
bly | +0.225 |
Neg logit contributions | |
There | -0.122 |
This | -0.120 |
Circ | -0.114 |
Where | -0.111 |
To | -0.111 |
Token | She |
Token ID | 1375 |
Feature activation | -5.34e+00 |
Pos logit contributions | |
Circ | +0.070 |
ork | +0.070 |
shortcut | +0.067 |
hike | +0.065 |
ride | +0.065 |
Neg logit contributions | |
glad | -0.105 |
impressed | -0.103 |
years | -0.100 |
i | -0.099 |
pleased | -0.097 |
Token | wanted |
Token ID | 2227 |
Feature activation | -1.50e+00 |
Pos logit contributions | |
anut | +0.154 |
bly | +0.143 |
bled | +0.142 |
i | +0.141 |
impressed | +0.139 |
Neg logit contributions | |
There | -0.132 |
This | -0.129 |
ride | -0.129 |
To | -0.126 |
On | -0.125 |
Token | . |
Token ID | 13 |
Feature activation | +3.72e+00 |
Pos logit contributions | |
careful | +0.003 |
i | +0.003 |
anut | +0.003 |
impressed | +0.003 |
glad | +0.003 |
Neg logit contributions | |
Circ | -0.003 |
There | -0.003 |
Across | -0.003 |
growth | -0.003 |
On | -0.003 |
Token | |
Token ID | 220 |
Feature activation | -2.88e+00 |
Pos logit contributions | |
ride | +0.072 |
Circ | +0.071 |
university | +0.070 |
shortcut | +0.069 |
Across | +0.068 |
Neg logit contributions | |
anut | -0.118 |
i | -0.118 |
glad | -0.112 |
impressed | -0.112 |
years | -0.111 |
Token |
|
Token ID | 198 |
Feature activation | +2.77e+00 |
Pos logit contributions | |
anut | +0.100 |
glad | +0.096 |
impressed | +0.095 |
bled | +0.093 |
i | +0.093 |
Neg logit contributions | |
ride | -0.049 |
hike | -0.048 |
Circ | -0.048 |
university | -0.047 |
growth | -0.046 |
Token |
|
Token ID | 198 |
Feature activation | +2.77e+00 |
Pos logit contributions | |
There | +0.038 |
Across | +0.033 |
This | +0.033 |
university | +0.033 |
ride | +0.033 |
Neg logit contributions | |
anut | -0.108 |
impressed | -0.107 |
i | -0.103 |
glad | -0.102 |
irling | -0.102 |
Token | M |
Token ID | 44 |
Feature activation | -1.01e+01 |
Pos logit contributions | |
This | +0.062 |
To | +0.062 |
Circ | +0.062 |
There | +0.060 |
On | +0.060 |
Neg logit contributions | |
anut | -0.086 |
i | -0.084 |
bled | -0.079 |
impressed | -0.078 |
maid | -0.077 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.06e+01 |
Pos logit contributions | |
impressed | +0.265 |
irling | +0.255 |
bly | +0.251 |
anut | +0.243 |
bled | +0.241 |
Neg logit contributions | |
ork | -0.279 |
There | -0.278 |
On | -0.273 |
This | -0.272 |
| -0.271 |
Token | a |
Token ID | 64 |
Feature activation | -8.57e+00 |
Pos logit contributions | |
irling | +0.552 |
itching | +0.485 |
thirst | +0.471 |
ishment | +0.468 |
glad | +0.467 |
Neg logit contributions | |
to | -0.532 |
To | -0.506 |
ork | -0.476 |
ray | -0.467 |
Circ | -0.459 |
Token | taught |
Token ID | 7817 |
Feature activation | -2.91e+00 |
Pos logit contributions | |
anut | +0.242 |
bly | +0.238 |
years | +0.229 |
bled | +0.225 |
irling | +0.224 |
Neg logit contributions | |
to | -0.207 |
ride | -0.206 |
There | -0.200 |
This | -0.197 |
To | -0.196 |
Token | them |
Token ID | 606 |
Feature activation | +1.51e+00 |
Pos logit contributions | |
anut | +0.092 |
impressed | +0.091 |
irling | +0.090 |
ushed | +0.090 |
bly | +0.090 |
Neg logit contributions | |
This | -0.064 |
There | -0.061 |
To | -0.061 |
Circ | -0.060 |
to | -0.060 |
Token | a |
Token ID | 257 |
Feature activation | +3.16e-01 |
Pos logit contributions | |
There | +0.035 |
Circ | +0.034 |
Across | +0.033 |
This | +0.033 |
Where | +0.033 |
Neg logit contributions | |
anut | -0.043 |
impressed | -0.043 |
i | -0.043 |
careful | -0.042 |
glad | -0.041 |
Token | special |
Token ID | 2041 |
Feature activation | -4.52e+00 |
Pos logit contributions | |
There | +0.009 |
Across | +0.008 |
growth | +0.008 |
This | +0.008 |
Circ | +0.008 |
Neg logit contributions | |
impressed | -0.008 |
careful | -0.008 |
pleased | -0.007 |
anut | -0.007 |
glad | -0.007 |
Token | to |
Token ID | 284 |
Feature activation | +6.65e+00 |
Pos logit contributions | |
careful | +0.314 |
i | +0.305 |
impressed | +0.301 |
gigg | +0.298 |
ies | +0.297 |
Neg logit contributions | |
growth | -0.133 |
There | -0.125 |
On | -0.119 |
Circ | -0.115 |
This | -0.115 |
Token | get |
Token ID | 651 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
There | +0.143 |
This | +0.133 |
Across | +0.133 |
Circ | +0.132 |
To | +0.127 |
Neg logit contributions | |
i | -0.218 |
anut | -0.217 |
impressed | -0.205 |
careful | -0.204 |
gigg | -0.202 |
Token | too |
Token ID | 1165 |
Feature activation | -4.60e+00 |
Pos logit contributions | |
There | +0.035 |
This | +0.034 |
Circ | +0.033 |
On | +0.033 |
Across | +0.033 |
Neg logit contributions | |
anut | -0.041 |
i | -0.040 |
impressed | -0.039 |
careful | -0.038 |
bly | -0.038 |
Token | wild |
Token ID | 4295 |
Feature activation | -5.37e+00 |
Pos logit contributions | |
careful | +0.100 |
anut | +0.094 |
i | +0.093 |
impressed | +0.090 |
glad | +0.088 |
Neg logit contributions | |
There | -0.151 |
This | -0.148 |
Where | -0.148 |
To | -0.146 |
Circ | -0.145 |
Token | . |
Token ID | 13 |
Feature activation | +3.31e+00 |
Pos logit contributions | |
i | +0.180 |
gigg | +0.166 |
ising | +0.164 |
anut | +0.162 |
careful | +0.162 |
Neg logit contributions | |
There | -0.108 |
Across | -0.104 |
Circ | -0.104 |
This | -0.100 |
On | -0.099 |
Token | Id |
Token ID | 5121 |
Feature activation | -2.06e+01 |
Pos logit contributions | |
Circ | +0.061 |
hike | +0.058 |
ride | +0.058 |
growth | +0.058 |
There | +0.056 |
Neg logit contributions | |
i | -0.106 |
anut | -0.105 |
impressed | -0.105 |
glad | -0.105 |
bled | -0.103 |
Token | a |
Token ID | 64 |
Feature activation | -9.08e+00 |
Pos logit contributions | |
anut | +0.391 |
bly | +0.364 |
glad | +0.362 |
impressed | +0.358 |
bled | +0.343 |
Neg logit contributions | |
ork | -0.655 |
ride | -0.637 |
to | -0.630 |
This | -0.630 |
To | -0.616 |
Token | was |
Token ID | 373 |
Feature activation | -3.96e+00 |
Pos logit contributions | |
anut | +0.268 |
bly | +0.251 |
bled | +0.244 |
impressed | +0.239 |
glad | +0.239 |
Neg logit contributions | |
ride | -0.215 |
to | -0.211 |
There | -0.206 |
This | -0.197 |
To | -0.196 |
Token | so |
Token ID | 523 |
Feature activation | -1.07e+01 |
Pos logit contributions | |
anut | +0.079 |
i | +0.075 |
ushed | +0.073 |
maid | +0.070 |
ising | +0.067 |
Neg logit contributions | |
There | -0.130 |
to | -0.129 |
ride | -0.128 |
This | -0.127 |
Across | -0.124 |
Token | happy |
Token ID | 3772 |
Feature activation | -1.36e+01 |
Pos logit contributions | |
anut | +0.215 |
i | +0.206 |
maid | +0.204 |
bled | +0.192 |
ushed | +0.182 |
Neg logit contributions | |
to | -0.360 |
There | -0.360 |
This | -0.357 |
ride | -0.353 |
Across | -0.335 |
Token | she |
Token ID | 673 |
Feature activation | -3.82e+00 |
Pos logit contributions | |
i | +0.364 |
impressed | +0.351 |
gigg | +0.342 |
careful | +0.336 |
glad | +0.330 |
Neg logit contributions | |
Circ | -0.370 |
There | -0.355 |
Across | -0.353 |
growth | -0.351 |
This | -0.339 |
Token | , |
Token ID | 11 |
Feature activation | -9.31e+00 |
Pos logit contributions | |
i | +0.131 |
anut | +0.131 |
impressed | +0.128 |
bly | +0.122 |
careful | +0.122 |
Neg logit contributions | |
There | -0.127 |
This | -0.123 |
Circ | -0.122 |
Across | -0.118 |
To | -0.118 |
Token | honey |
Token ID | 12498 |
Feature activation | +5.15e-01 |
Pos logit contributions | |
anut | +0.272 |
years | +0.265 |
bled | +0.264 |
irling | +0.261 |
ushed | +0.259 |
Neg logit contributions | |
There | -0.225 |
This | -0.217 |
Where | -0.213 |
ride | -0.210 |
Circ | -0.206 |
Token | ?" |
Token ID | 1701 |
Feature activation | +5.33e+00 |
Pos logit contributions | |
Circ | +0.011 |
There | +0.011 |
This | +0.011 |
To | +0.010 |
Across | +0.010 |
Neg logit contributions | |
i | -0.016 |
anut | -0.016 |
impressed | -0.015 |
bly | -0.015 |
bled | -0.015 |
Token | asked |
Token ID | 1965 |
Feature activation | -2.05e+00 |
Pos logit contributions | |
There | +0.124 |
This | +0.121 |
Circ | +0.117 |
Where | +0.114 |
To | +0.114 |
Neg logit contributions | |
anut | -0.160 |
i | -0.154 |
impressed | -0.152 |
bled | -0.148 |
bly | -0.148 |
Token | M |
Token ID | 337 |
Feature activation | -1.13e+01 |
Pos logit contributions | |
anut | +0.055 |
i | +0.054 |
impressed | +0.052 |
bled | +0.051 |
bly | +0.051 |
Neg logit contributions | |
There | -0.053 |
This | -0.052 |
Circ | -0.051 |
Across | -0.050 |
Where | -0.050 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.05e+01 |
Pos logit contributions | |
anut | +0.184 |
i | +0.172 |
impressed | +0.168 |
bled | +0.159 |
bly | +0.158 |
Neg logit contributions | |
There | -0.416 |
This | -0.411 |
Circ | -0.403 |
To | -0.397 |
Where | -0.397 |
Token | a |
Token ID | 64 |
Feature activation | -8.52e+00 |
Pos logit contributions | |
irling | +0.423 |
bled | +0.360 |
bly | +0.358 |
impressed | +0.357 |
anut | +0.355 |
Neg logit contributions | |
to | -0.659 |
There | -0.619 |
ork | -0.597 |
ride | -0.595 |
To | -0.595 |
Token | . |
Token ID | 13 |
Feature activation | +4.41e+00 |
Pos logit contributions | |
anut | +0.330 |
impressed | +0.316 |
i | +0.313 |
bled | +0.313 |
bly | +0.313 |
Neg logit contributions | |
There | -0.121 |
This | -0.116 |
To | -0.106 |
Across | -0.104 |
Where | -0.104 |
Token |
|
Token ID | 198 |
Feature activation | +3.35e+00 |
Pos logit contributions | |
Circ | +0.059 |
ride | +0.058 |
There | +0.056 |
This | +0.055 |
university | +0.054 |
Neg logit contributions | |
anut | -0.168 |
i | -0.167 |
impressed | -0.165 |
glad | -0.160 |
careful | -0.158 |
Token |
|
Token ID | 198 |
Feature activation | +3.57e+00 |
Pos logit contributions | |
There | +0.052 |
ride | +0.051 |
Across | +0.049 |
university | +0.048 |
Circ | +0.048 |
Neg logit contributions | |
anut | -0.121 |
i | -0.118 |
impressed | -0.117 |
careful | -0.114 |
irling | -0.114 |
Token | Ron |
Token ID | 23672 |
Feature activation | -1.32e+01 |
Pos logit contributions | |
There | +0.086 |
This | +0.083 |
Circ | +0.081 |
ride | +0.080 |
Across | +0.079 |
Neg logit contributions | |
anut | -0.104 |
i | -0.101 |
impressed | -0.099 |
bly | -0.097 |
irling | -0.095 |
Token | . |
Token ID | 13 |
Feature activation | +3.74e+00 |
Pos logit contributions | |
anut | +0.023 |
i | +0.022 |
impressed | +0.022 |
bled | +0.021 |
bly | +0.021 |
Neg logit contributions | |
There | -0.009 |
This | -0.009 |
Circ | -0.009 |
To | -0.008 |
Where | -0.008 |
Token | |
Token ID | 220 |
Feature activation | -2.80e+00 |
Pos logit contributions | |
There | +0.067 |
Circ | +0.067 |
ride | +0.065 |
university | +0.065 |
This | +0.064 |
Neg logit contributions | |
anut | -0.127 |
i | -0.124 |
impressed | -0.122 |
glad | -0.119 |
bled | -0.118 |
Token |
|
Token ID | 198 |
Feature activation | +2.95e+00 |
Pos logit contributions | |
anut | +0.096 |
glad | +0.094 |
impressed | +0.093 |
bled | +0.093 |
careful | +0.091 |
Neg logit contributions | |
Circ | -0.049 |
ride | -0.049 |
hike | -0.048 |
university | -0.048 |
growth | -0.047 |
Token |
|
Token ID | 198 |
Feature activation | +3.00e+00 |
Pos logit contributions | |
There | +0.026 |
This | +0.025 |
Circ | +0.022 |
To | +0.021 |
Where | +0.021 |
Neg logit contributions | |
anut | -0.131 |
i | -0.129 |
impressed | -0.126 |
bled | -0.125 |
bly | -0.124 |
Token | M |
Token ID | 44 |
Feature activation | -9.88e+00 |
Pos logit contributions | |
To | +0.066 |
Circ | +0.063 |
This | +0.062 |
To | +0.062 |
There | +0.061 |
Neg logit contributions | |
i | -0.093 |
anut | -0.093 |
maid | -0.086 |
impressed | -0.086 |
years | -0.086 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.04e+01 |
Pos logit contributions | |
impressed | +0.248 |
irling | +0.241 |
anut | +0.236 |
bly | +0.228 |
bled | +0.222 |
Neg logit contributions | |
ork | -0.278 |
There | -0.278 |
| -0.275 |
This | -0.274 |
On | -0.268 |
Token | a |
Token ID | 64 |
Feature activation | -8.36e+00 |
Pos logit contributions | |
irling | +0.581 |
ishment | +0.509 |
itching | +0.507 |
anut | +0.487 |
ising | +0.479 |
Neg logit contributions | |
to | -0.501 |
To | -0.455 |
of | -0.435 |
ray | -0.427 |
ork | -0.420 |
Token | and |
Token ID | 290 |
Feature activation | -8.27e+00 |
Pos logit contributions | |
anut | +0.217 |
bly | +0.208 |
years | +0.207 |
bled | +0.203 |
irling | +0.197 |
Neg logit contributions | |
to | -0.219 |
ride | -0.217 |
There | -0.214 |
This | -0.214 |
To | -0.211 |
Token | Billy |
Token ID | 15890 |
Feature activation | -5.50e+00 |
Pos logit contributions | |
irling | +0.285 |
anut | +0.275 |
bly | +0.269 |
impressed | +0.269 |
ushed | +0.261 |
Neg logit contributions | |
This | -0.166 |
There | -0.164 |
To | -0.159 |
ride | -0.157 |
to | -0.153 |
Token | stayed |
Token ID | 9658 |
Feature activation | -2.60e+00 |
Pos logit contributions | |
anut | +0.153 |
bled | +0.152 |
bly | +0.151 |
impressed | +0.149 |
irling | +0.148 |
Neg logit contributions | |
ride | -0.138 |
visit | -0.126 |
There | -0.123 |
hike | -0.122 |
to | -0.121 |
Token | by |
Token ID | 416 |
Feature activation | +3.34e+00 |
Pos logit contributions | |
anut | +0.094 |
years | +0.092 |
ushed | +0.090 |
i | +0.089 |
mm | +0.087 |
Neg logit contributions | |
to | -0.047 |
visit | -0.042 |
ride | -0.041 |
corner | -0.041 |
There | -0.040 |
Token | yawn |
Token ID | 40908 |
Feature activation | -7.67e+00 |
Pos logit contributions | |
anut | +0.081 |
i | +0.078 |
impressed | +0.076 |
bled | +0.075 |
bly | +0.074 |
Neg logit contributions | |
There | -0.057 |
This | -0.056 |
Circ | -0.054 |
ride | -0.053 |
To | -0.052 |
Token | . |
Token ID | 13 |
Feature activation | +4.37e+00 |
Pos logit contributions | |
anut | +0.254 |
i | +0.254 |
impressed | +0.247 |
bly | +0.244 |
bled | +0.242 |
Neg logit contributions | |
There | -0.147 |
This | -0.142 |
Circ | -0.142 |
Across | -0.136 |
Where | -0.135 |
Token |
|
Token ID | 198 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
hike | +0.086 |
Circ | +0.082 |
university | +0.082 |
ride | +0.081 |
ork | +0.080 |
Neg logit contributions | |
glad | -0.137 |
impressed | -0.133 |
grateful | -0.130 |
pleased | -0.129 |
bled | -0.129 |
Token |
|
Token ID | 198 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
There | +0.061 |
university | +0.061 |
Circ | +0.059 |
Across | +0.057 |
library | +0.057 |
Neg logit contributions | |
impressed | -0.107 |
glad | -0.106 |
anut | -0.105 |
bled | -0.104 |
pleased | -0.102 |
Token | M |
Token ID | 44 |
Feature activation | -9.78e+00 |
Pos logit contributions | |
There | +0.095 |
This | +0.092 |
Where | +0.088 |
Circ | +0.088 |
university | +0.087 |
Neg logit contributions | |
anut | -0.080 |
impressed | -0.080 |
bly | -0.079 |
careful | -0.077 |
bled | -0.077 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.04e+01 |
Pos logit contributions | |
anut | +0.204 |
impressed | +0.197 |
bly | +0.184 |
i | +0.184 |
irling | +0.181 |
Neg logit contributions | |
There | -0.323 |
This | -0.317 |
ork | -0.308 |
On | -0.307 |
To | -0.307 |
Token | a |
Token ID | 64 |
Feature activation | -8.23e+00 |
Pos logit contributions | |
irling | +0.556 |
anut | +0.508 |
itching | +0.497 |
ishment | +0.475 |
glad | +0.471 |
Neg logit contributions | |
to | -0.514 |
To | -0.488 |
ride | -0.449 |
ork | -0.445 |
ray | -0.441 |
Token | smiled |
Token ID | 13541 |
Feature activation | -1.06e+01 |
Pos logit contributions | |
anut | +0.192 |
bly | +0.175 |
years | +0.173 |
bled | +0.171 |
impressed | +0.171 |
Neg logit contributions | |
ride | -0.245 |
There | -0.243 |
to | -0.237 |
This | -0.237 |
To | -0.237 |
Token | and |
Token ID | 290 |
Feature activation | -8.15e+00 |
Pos logit contributions | |
anut | +0.376 |
irling | +0.342 |
years | +0.339 |
akers | +0.334 |
rim | +0.330 |
Neg logit contributions | |
to | -0.222 |
There | -0.211 |
This | -0.201 |
To | -0.195 |
ride | -0.192 |
Token | said |
Token ID | 531 |
Feature activation | -6.27e+00 |
Pos logit contributions | |
anut | +0.207 |
years | +0.194 |
akers | +0.189 |
ising | +0.188 |
irling | +0.186 |
Neg logit contributions | |
ride | -0.237 |
to | -0.230 |
To | -0.225 |
There | -0.225 |
This | -0.216 |
Token | , |
Token ID | 11 |
Feature activation | -8.90e+00 |
Pos logit contributions | |
anut | +0.158 |
i | +0.149 |
impressed | +0.146 |
years | +0.140 |
bly | +0.140 |
Neg logit contributions | |
There | -0.178 |
This | -0.177 |
Circ | -0.169 |
To | -0.168 |
Where | -0.168 |
Token | up |
Token ID | 510 |
Feature activation | +2.18e+00 |
Pos logit contributions | |
anut | +0.162 |
i | +0.154 |
bly | +0.149 |
bled | +0.149 |
irling | +0.146 |
Neg logit contributions | |
There | -0.158 |
This | -0.154 |
To | -0.149 |
Across | -0.149 |
university | -0.148 |
Token | and |
Token ID | 290 |
Feature activation | -7.91e+00 |
Pos logit contributions | |
There | +0.052 |
This | +0.051 |
Circ | +0.050 |
Where | +0.049 |
Across | +0.049 |
Neg logit contributions | |
anut | -0.063 |
i | -0.061 |
impressed | -0.060 |
bled | -0.058 |
bly | -0.058 |
Token | said |
Token ID | 531 |
Feature activation | -6.18e+00 |
Pos logit contributions | |
anut | +0.261 |
i | +0.249 |
bly | +0.247 |
bled | +0.243 |
irling | +0.242 |
Neg logit contributions | |
There | -0.161 |
This | -0.154 |
ride | -0.149 |
To | -0.149 |
Where | -0.147 |
Token | " |
Token ID | 366 |
Feature activation | +3.28e+00 |
Pos logit contributions | |
anut | +0.153 |
i | +0.147 |
impressed | +0.144 |
bled | +0.139 |
bly | +0.139 |
Neg logit contributions | |
There | -0.175 |
This | -0.173 |
Circ | -0.168 |
To | -0.165 |
Where | -0.165 |
Token | M |
Token ID | 44 |
Feature activation | -9.71e+00 |
Pos logit contributions | |
There | +0.071 |
university | +0.067 |
This | +0.066 |
To | +0.064 |
University | +0.063 |
Neg logit contributions | |
anut | -0.102 |
i | -0.102 |
years | -0.098 |
bly | -0.096 |
impressed | -0.095 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.03e+01 |
Pos logit contributions | |
anut | +0.197 |
impressed | +0.187 |
i | +0.184 |
bled | +0.176 |
bly | +0.174 |
Neg logit contributions | |
There | -0.324 |
This | -0.319 |
Circ | -0.307 |
Across | -0.306 |
On | -0.306 |
Token | a |
Token ID | 64 |
Feature activation | -8.23e+00 |
Pos logit contributions | |
anut | +0.349 |
impressed | +0.326 |
i | +0.323 |
irling | +0.318 |
bled | +0.316 |
Neg logit contributions | |
There | -0.723 |
This | -0.720 |
Circ | -0.696 |
Where | -0.694 |
ride | -0.692 |
Token | , |
Token ID | 11 |
Feature activation | -8.65e+00 |
Pos logit contributions | |
anut | +0.190 |
i | +0.189 |
impressed | +0.177 |
careful | +0.171 |
bly | +0.167 |
Neg logit contributions | |
There | -0.243 |
Circ | -0.240 |
Across | -0.232 |
This | -0.230 |
On | -0.230 |
Token | I |
Token ID | 314 |
Feature activation | +3.49e+00 |
Pos logit contributions | |
i | +0.241 |
anut | +0.237 |
careful | +0.236 |
impressed | +0.233 |
glad | +0.222 |
Neg logit contributions | |
Circ | -0.207 |
Across | -0.199 |
growth | -0.196 |
There | -0.196 |
On | -0.195 |
Token | need |
Token ID | 761 |
Feature activation | +2.94e+00 |
Pos logit contributions | |
There | +0.072 |
This | +0.070 |
ride | +0.069 |
Where | +0.067 |
Circ | +0.066 |
Neg logit contributions | |
anut | -0.114 |
bled | -0.110 |
i | -0.109 |
impressed | -0.109 |
bly | -0.108 |
Token | a |
Token ID | 257 |
Feature activation | +8.69e-01 |
Pos logit contributions | |
Circ | +0.068 |
There | +0.068 |
Across | +0.067 |
On | +0.064 |
hike | +0.063 |
Neg logit contributions | |
careful | -0.083 |
i | -0.082 |
anut | -0.080 |
then | -0.079 |
years | -0.078 |
Token | the |
Token ID | 262 |
Feature activation | +4.52e+00 |
Pos logit contributions | |
There | +0.105 |
This | +0.101 |
Circ | +0.099 |
Across | +0.097 |
Where | +0.096 |
Neg logit contributions | |
anut | -0.184 |
i | -0.180 |
impressed | -0.176 |
bly | -0.172 |
bled | -0.171 |
Token | cliff |
Token ID | 19516 |
Feature activation | +8.72e+00 |
Pos logit contributions | |
There | +0.085 |
This | +0.083 |
To | +0.079 |
Circ | +0.078 |
On | +0.077 |
Neg logit contributions | |
anut | -0.156 |
i | -0.154 |
impressed | -0.153 |
ising | -0.146 |
glad | -0.146 |
Token | ! |
Token ID | 0 |
Feature activation | +3.36e+00 |
Pos logit contributions | |
There | +0.233 |
This | +0.227 |
Circ | +0.224 |
Where | +0.219 |
Across | +0.218 |
Neg logit contributions | |
anut | -0.228 |
i | -0.221 |
impressed | -0.217 |
bly | -0.208 |
bled | -0.206 |
Token |
|
Token ID | 198 |
Feature activation | +3.06e+00 |
Pos logit contributions | |
ride | +0.063 |
Circ | +0.061 |
university | +0.061 |
hike | +0.060 |
There | +0.059 |
Neg logit contributions | |
anut | -0.107 |
i | -0.106 |
impressed | -0.106 |
glad | -0.104 |
careful | -0.102 |
Token | M |
Token ID | 44 |
Feature activation | -9.68e+00 |
Pos logit contributions | |
There | +0.045 |
This | +0.042 |
Circ | +0.042 |
university | +0.041 |
Across | +0.041 |
Neg logit contributions | |
anut | -0.117 |
impressed | -0.114 |
i | -0.113 |
glad | -0.111 |
careful | -0.110 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.03e+01 |
Pos logit contributions | |
impressed | +0.239 |
anut | +0.233 |
irling | +0.230 |
bly | +0.221 |
careful | +0.210 |
Neg logit contributions | |
There | -0.284 |
ork | -0.281 |
This | -0.275 |
Where | -0.268 |
On | -0.266 |
Token | a |
Token ID | 64 |
Feature activation | -8.36e+00 |
Pos logit contributions | |
irling | +0.554 |
anut | +0.469 |
thirst | +0.468 |
bled | +0.468 |
mm | +0.467 |
Neg logit contributions | |
to | -0.527 |
To | -0.479 |
ray | -0.461 |
ork | -0.447 |
Circ | -0.445 |
Token | : |
Token ID | 25 |
Feature activation | -4.48e+00 |
Pos logit contributions | |
anut | +0.225 |
bly | +0.205 |
bled | +0.205 |
impressed | +0.205 |
i | +0.204 |
Neg logit contributions | |
There | -0.223 |
This | -0.217 |
To | -0.210 |
ride | -0.209 |
Circ | -0.208 |
Token | That |
Token ID | 1320 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
impressed | +0.156 |
glad | +0.151 |
pleased | +0.149 |
i | +0.148 |
gigg | +0.147 |
Neg logit contributions | |
Circ | -0.078 |
There | -0.073 |
ork | -0.073 |
growth | -0.072 |
hike | -0.072 |
Token | â |
Token ID | 22940 |
Feature activation | -1.27e+01 |
Pos logit contributions | |
anut | +0.044 |
i | +0.042 |
impressed | +0.042 |
bly | +0.040 |
bled | +0.040 |
Neg logit contributions | |
There | -0.032 |
This | -0.031 |
Circ | -0.030 |
To | -0.030 |
Across | -0.030 |
Token | € |
Token ID | 26391 |
Feature activation | -1.04e+01 |
Pos logit contributions | |
anut | +0.351 |
i | +0.343 |
bly | +0.318 |
bled | +0.315 |
impressed | +0.314 |
Neg logit contributions | |
There | -0.321 |
Circ | -0.313 |
This | -0.306 |
Where | -0.302 |
university | -0.301 |
Token | . |
Token ID | 13 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
anut | +0.236 |
i | +0.222 |
impressed | +0.220 |
bled | +0.216 |
bly | +0.215 |
Neg logit contributions | |
There | -0.246 |
This | -0.241 |
To | -0.232 |
Circ | -0.230 |
ride | -0.230 |
Token | She |
Token ID | 1375 |
Feature activation | -7.76e+00 |
Pos logit contributions | |
Circ | +0.026 |
ork | +0.024 |
hike | +0.024 |
ride | +0.023 |
shortcut | +0.023 |
Neg logit contributions | |
glad | -0.037 |
impressed | -0.037 |
i | -0.036 |
anut | -0.036 |
years | -0.035 |
Token | was |
Token ID | 373 |
Feature activation | -5.43e+00 |
Pos logit contributions | |
anut | +0.195 |
i | +0.186 |
impressed | +0.182 |
bled | +0.178 |
bly | +0.177 |
Neg logit contributions | |
There | -0.217 |
This | -0.213 |
Circ | -0.207 |
To | -0.204 |
ride | -0.204 |
Token | three |
Token ID | 1115 |
Feature activation | -9.42e+00 |
Pos logit contributions | |
anut | +0.126 |
i | +0.121 |
ushed | +0.119 |
ising | +0.117 |
maid | +0.112 |
Neg logit contributions | |
There | -0.162 |
to | -0.159 |
This | -0.156 |
To | -0.155 |
university | -0.154 |
Token | - |
Token ID | 12 |
Feature activation | -4.69e+00 |
Pos logit contributions | |
bled | +0.215 |
anut | +0.192 |
bly | +0.180 |
ushed | +0.157 |
maid | +0.154 |
Neg logit contributions | |
to | -0.300 |
corner | -0.294 |
There | -0.287 |
stairs | -0.282 |
university | -0.280 |
Token | years |
Token ID | 19002 |
Feature activation | -2.03e+01 |
Pos logit contributions | |
anut | +0.048 |
maid | +0.044 |
i | +0.043 |
bled | +0.042 |
ushed | +0.040 |
Neg logit contributions | |
Circ | -0.191 |
to | -0.188 |
This | -0.188 |
There | -0.187 |
To | -0.187 |
Token | - |
Token ID | 12 |
Feature activation | -3.87e+00 |
Pos logit contributions | |
anut | +0.509 |
bled | +0.495 |
bly | +0.491 |
i | +0.481 |
impressed | +0.478 |
Neg logit contributions | |
This | -0.546 |
There | -0.531 |
Circ | -0.528 |
ride | -0.524 |
to | -0.515 |
Token | old |
Token ID | 727 |
Feature activation | -6.58e+00 |
Pos logit contributions | |
anut | +0.116 |
i | +0.112 |
impressed | +0.110 |
bled | +0.109 |
bly | +0.109 |
Neg logit contributions | |
This | -0.088 |
There | -0.087 |
Circ | -0.085 |
To | -0.083 |
ride | -0.083 |
Token | and |
Token ID | 290 |
Feature activation | -8.26e+00 |
Pos logit contributions | |
anut | +0.207 |
impressed | +0.197 |
bled | +0.196 |
i | +0.195 |
bly | +0.195 |
Neg logit contributions | |
There | -0.142 |
This | -0.141 |
Where | -0.132 |
To | -0.132 |
Circ | -0.132 |
Token | she |
Token ID | 673 |
Feature activation | -2.79e+00 |
Pos logit contributions | |
anut | +0.228 |
i | +0.211 |
maid | +0.205 |
bly | +0.198 |
bled | +0.197 |
Neg logit contributions | |
This | -0.223 |
There | -0.214 |
to | -0.214 |
To | -0.213 |
ride | -0.212 |
Token | loved |
Token ID | 6151 |
Feature activation | -6.04e+00 |
Pos logit contributions | |
anut | +0.083 |
i | +0.077 |
bled | +0.076 |
bly | +0.076 |
irling | +0.074 |
Neg logit contributions | |
There | -0.066 |
This | -0.065 |
ride | -0.064 |
To | -0.063 |
Where | -0.062 |
Token | igator |
Token ID | 23823 |
Feature activation | +9.47e-01 |
Pos logit contributions | |
There | +0.052 |
This | +0.049 |
Circ | +0.049 |
Where | +0.048 |
Across | +0.047 |
Neg logit contributions | |
anut | -0.079 |
i | -0.077 |
impressed | -0.077 |
bly | -0.074 |
careful | -0.074 |
Token | toy |
Token ID | 13373 |
Feature activation | +1.70e+00 |
Pos logit contributions | |
There | +0.019 |
This | +0.018 |
Circ | +0.017 |
Where | +0.017 |
Across | +0.017 |
Neg logit contributions | |
anut | -0.032 |
i | -0.031 |
impressed | -0.030 |
bly | -0.029 |
careful | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +4.38e+00 |
Pos logit contributions | |
There | +0.032 |
This | +0.031 |
Circ | +0.030 |
ride | +0.029 |
to | +0.029 |
Neg logit contributions | |
anut | -0.058 |
impressed | -0.056 |
i | -0.056 |
irling | -0.055 |
bled | -0.054 |
Token | But |
Token ID | 887 |
Feature activation | -5.04e+00 |
Pos logit contributions | |
Circ | +0.075 |
ride | +0.074 |
There | +0.072 |
Across | +0.070 |
university | +0.070 |
Neg logit contributions | |
anut | -0.149 |
i | -0.147 |
impressed | -0.144 |
years | -0.142 |
glad | -0.141 |
Token | M |
Token ID | 337 |
Feature activation | -1.12e+01 |
Pos logit contributions | |
anut | +0.189 |
ushed | +0.181 |
irling | +0.180 |
aming | +0.175 |
bly | +0.175 |
Neg logit contributions | |
to | -0.099 |
There | -0.094 |
This | -0.092 |
To | -0.089 |
visit | -0.085 |
Token | omm |
Token ID | 2002 |
Feature activation | -2.02e+01 |
Pos logit contributions | |
impressed | +0.294 |
bly | +0.270 |
pleased | +0.269 |
irling | +0.268 |
careful | +0.265 |
Neg logit contributions | |
This | -0.322 |
There | -0.321 |
On | -0.308 |
| -0.304 |
To | -0.304 |
Token | a |
Token ID | 64 |
Feature activation | -8.16e+00 |
Pos logit contributions | |
irling | +0.511 |
bled | +0.448 |
glad | +0.436 |
thirst | +0.430 |
gigg | +0.427 |
Neg logit contributions | |
to | -0.551 |
ork | -0.514 |
To | -0.513 |
ride | -0.480 |
ray | -0.475 |
Token | taught |
Token ID | 7817 |
Feature activation | -2.39e+00 |
Pos logit contributions | |
bly | +0.236 |
anut | +0.233 |
bled | +0.225 |
years | +0.223 |
irling | +0.215 |
Neg logit contributions | |
to | -0.190 |
ride | -0.188 |
There | -0.187 |
This | -0.182 |
To | -0.176 |
Token | her |
Token ID | 607 |
Feature activation | -4.68e+00 |
Pos logit contributions | |
anut | +0.075 |
impressed | +0.073 |
bly | +0.071 |
bled | +0.071 |
irling | +0.071 |
Neg logit contributions | |
This | -0.052 |
There | -0.052 |
Circ | -0.050 |
To | -0.050 |
Where | -0.049 |
Token | that |
Token ID | 326 |
Feature activation | +2.94e+00 |
Pos logit contributions | |
impressed | +0.128 |
anut | +0.128 |
i | +0.127 |
careful | +0.123 |
bly | +0.120 |
Neg logit contributions | |
There | -0.119 |
Circ | -0.115 |
This | -0.113 |
Across | -0.111 |
Where | -0.109 |
Token | it |
Token ID | 340 |
Feature activation | +4.96e+00 |
Pos logit contributions | |
There | +0.075 |
This | +0.074 |
Circ | +0.072 |
Across | +0.072 |
On | +0.071 |
Neg logit contributions | |
i | -0.079 |
anut | -0.078 |
impressed | -0.077 |
careful | -0.074 |
glad | -0.072 |
Token | it |
Token ID | 340 |
Feature activation | +1.43e+00 |
Pos logit contributions | |
There | +0.073 |
Across | +0.070 |
Circ | +0.069 |
ork | +0.069 |
To | +0.068 |
Neg logit contributions | |
i | -0.073 |
impressed | -0.073 |
anut | -0.072 |
gigg | -0.071 |
careful | -0.071 |
Token | and |
Token ID | 290 |
Feature activation | -1.06e+01 |
Pos logit contributions | |
There | +0.034 |
Circ | +0.033 |
This | +0.032 |
Where | +0.032 |
Across | +0.031 |
Neg logit contributions | |
anut | -0.041 |
i | -0.041 |
impressed | -0.040 |
careful | -0.039 |
bled | -0.039 |
Token | said |
Token ID | 531 |
Feature activation | -8.59e+00 |
Pos logit contributions | |
anut | +0.233 |
i | +0.221 |
impressed | +0.214 |
bly | +0.208 |
bled | +0.208 |
Neg logit contributions | |
There | -0.332 |
This | -0.327 |
Circ | -0.319 |
To | -0.315 |
ride | -0.315 |
Token | , |
Token ID | 11 |
Feature activation | -1.05e+01 |
Pos logit contributions | |
anut | +0.237 |
i | +0.224 |
impressed | +0.215 |
bly | +0.214 |
years | +0.213 |
Neg logit contributions | |
There | -0.224 |
This | -0.223 |
Where | -0.211 |
To | -0.210 |
Circ | -0.208 |
Token | " |
Token ID | 366 |
Feature activation | +2.32e+00 |
Pos logit contributions | |
impressed | +0.392 |
anut | +0.390 |
i | +0.383 |
glad | +0.377 |
pleased | +0.373 |
Neg logit contributions | |
Circ | -0.151 |
There | -0.146 |
This | -0.140 |
growth | -0.139 |
Across | -0.137 |
Token | E |
Token ID | 36 |
Feature activation | -2.01e+01 |
Pos logit contributions | |
There | +0.050 |
This | +0.048 |
university | +0.047 |
To | +0.046 |
Circ | +0.046 |
Neg logit contributions | |
anut | -0.074 |
i | -0.072 |
impressed | -0.070 |
bly | -0.069 |
bled | -0.068 |
Token | w |
Token ID | 86 |
Feature activation | -8.57e+00 |
Pos logit contributions | |
impressed | +0.421 |
anut | +0.404 |
years | +0.393 |
pleased | +0.385 |
glad | +0.380 |
Neg logit contributions | |
This | -0.651 |
There | -0.625 |
ride | -0.624 |
Circ | -0.617 |
| -0.615 |
Token | , |
Token ID | 11 |
Feature activation | -8.82e+00 |
Pos logit contributions | |
anut | +0.239 |
years | +0.230 |
impressed | +0.228 |
bly | +0.219 |
i | +0.215 |
Neg logit contributions | |
This | -0.213 |
There | -0.208 |
ride | -0.199 |
to | -0.196 |
To | -0.195 |
Token | Mom |
Token ID | 11254 |
Feature activation | -7.74e+00 |
Pos logit contributions | |
anut | +0.255 |
i | +0.250 |
impressed | +0.243 |
bly | +0.236 |
glad | +0.234 |
Neg logit contributions | |
There | -0.206 |
Circ | -0.206 |
Across | -0.200 |
On | -0.200 |
This | -0.197 |
Token | my |
Token ID | 1820 |
Feature activation | -7.41e+00 |
Pos logit contributions | |
anut | +0.208 |
impressed | +0.197 |
i | +0.194 |
bled | +0.194 |
bly | +0.193 |
Neg logit contributions | |
There | -0.204 |
This | -0.203 |
ride | -0.193 |
To | -0.190 |
Where | -0.189 |
Token | , |
Token ID | 11 |
Feature activation | -8.44e+00 |
Pos logit contributions | |
anut | +0.200 |
i | +0.193 |
impressed | +0.189 |
bled | +0.183 |
bly | +0.183 |
Neg logit contributions | |
There | -0.193 |
This | -0.190 |
Circ | -0.184 |
To | -0.181 |
Where | -0.181 |
Token | the |
Token ID | 262 |
Feature activation | +4.99e+00 |
Pos logit contributions | |
There | +0.176 |
Circ | +0.167 |
This | +0.166 |
Across | +0.163 |
Where | +0.159 |
Neg logit contributions | |
anut | -0.292 |
i | -0.287 |
impressed | -0.277 |
bly | -0.273 |
bled | -0.271 |
Token | new |
Token ID | 649 |
Feature activation | -2.74e+00 |
Pos logit contributions | |
There | +0.103 |
This | +0.099 |
Circ | +0.097 |
Where | +0.095 |
Across | +0.093 |
Neg logit contributions | |
anut | -0.161 |
i | -0.156 |
impressed | -0.153 |
careful | -0.151 |
bly | -0.151 |
Token | bed |
Token ID | 3996 |
Feature activation | +4.49e+00 |
Pos logit contributions | |
anut | +0.085 |
i | +0.085 |
careful | +0.081 |
impressed | +0.080 |
bly | +0.079 |
Neg logit contributions | |
There | -0.060 |
This | -0.056 |
Circ | -0.055 |
Where | -0.055 |
Across | -0.054 |
Token | . |
Token ID | 13 |
Feature activation | +4.65e+00 |
Pos logit contributions | |
Circ | +0.087 |
There | +0.087 |
This | +0.083 |
Where | +0.082 |
Across | +0.081 |
Neg logit contributions | |
anut | -0.147 |
i | -0.145 |
impressed | -0.142 |
bly | -0.141 |
bled | -0.140 |
Token | M |
Token ID | 337 |
Feature activation | -1.10e+01 |
Pos logit contributions | |
ride | +0.087 |
hike | +0.086 |
Circ | +0.085 |
university | +0.080 |
shortcut | +0.079 |
Neg logit contributions | |
glad | -0.148 |
impressed | -0.145 |
i | -0.144 |
careful | -0.143 |
pleased | -0.142 |
Token | omm |
Token ID | 2002 |
Feature activation | -1.99e+01 |
Pos logit contributions | |
impressed | +0.246 |
anut | +0.230 |
irling | +0.224 |
careful | +0.219 |
pleased | +0.218 |
Neg logit contributions | |
There | -0.350 |
This | -0.349 |
Where | -0.334 |
To | -0.331 |
On | -0.330 |
Token | a |
Token ID | 64 |
Feature activation | -7.87e+00 |
Pos logit contributions | |
irling | +0.502 |
anut | +0.468 |
bled | +0.435 |
glad | +0.430 |
itching | +0.426 |
Neg logit contributions | |
to | -0.532 |
To | -0.510 |
ride | -0.485 |
ork | -0.481 |
ray | -0.460 |
Token | smiled |
Token ID | 13541 |
Feature activation | -1.02e+01 |
Pos logit contributions | |
anut | +0.208 |
impressed | +0.192 |
i | +0.191 |
bly | +0.190 |
bled | +0.190 |
Neg logit contributions | |
There | -0.211 |
This | -0.205 |
ride | -0.202 |
To | -0.200 |
Circ | -0.199 |
Token | and |
Token ID | 290 |
Feature activation | -7.84e+00 |
Pos logit contributions | |
anut | +0.330 |
i | +0.298 |
irling | +0.298 |
years | +0.297 |
impressed | +0.294 |
Neg logit contributions | |
There | -0.231 |
to | -0.225 |
This | -0.224 |
To | -0.214 |
Across | -0.212 |
Token | said |
Token ID | 531 |
Feature activation | -6.03e+00 |
Pos logit contributions | |
anut | +0.188 |
years | +0.172 |
irling | +0.167 |
bly | +0.166 |
ising | +0.165 |
Neg logit contributions | |
There | -0.232 |
ride | -0.231 |
To | -0.226 |
This | -0.226 |
to | -0.224 |
Token | , |
Token ID | 11 |
Feature activation | -8.63e+00 |
Pos logit contributions | |
anut | +0.127 |
i | +0.127 |
impressed | +0.123 |
bled | +0.118 |
bly | +0.118 |
Neg logit contributions | |
There | -0.186 |
Circ | -0.183 |
This | -0.179 |
ride | -0.179 |
Across | -0.178 |
Token | , |
Token ID | 11 |
Feature activation | -8.32e+00 |
Pos logit contributions | |
anut | +0.293 |
i | +0.277 |
impressed | +0.275 |
bled | +0.267 |
bly | +0.266 |
Neg logit contributions | |
There | -0.321 |
This | -0.317 |
Circ | -0.303 |
To | -0.302 |
Where | -0.301 |
Token | we |
Token ID | 356 |
Feature activation | +3.13e+00 |
Pos logit contributions | |
anut | +0.246 |
i | +0.233 |
impressed | +0.228 |
bled | +0.228 |
years | +0.226 |
Neg logit contributions | |
There | -0.203 |
This | -0.197 |
To | -0.187 |
ride | -0.187 |
Where | -0.187 |
Token | will |
Token ID | 481 |
Feature activation | -5.54e-01 |
Pos logit contributions | |
Circ | +0.060 |
There | +0.060 |
This | +0.060 |
Across | +0.057 |
Where | +0.055 |
Neg logit contributions | |
anut | -0.104 |
i | -0.103 |
impressed | -0.099 |
careful | -0.097 |
bly | -0.096 |
Token | get |
Token ID | 651 |
Feature activation | +2.39e+00 |
Pos logit contributions | |
anut | +0.019 |
i | +0.018 |
impressed | +0.018 |
bled | +0.017 |
bly | +0.017 |
Neg logit contributions | |
There | -0.011 |
This | -0.011 |
Circ | -0.010 |
To | -0.010 |
ride | -0.010 |
Token | close |
Token ID | 1969 |
Feature activation | -3.24e+00 |
Pos logit contributions | |
There | +0.055 |
This | +0.055 |
Circ | +0.053 |
ride | +0.052 |
To | +0.052 |
Neg logit contributions | |
anut | -0.071 |
i | -0.068 |
impressed | -0.068 |
bled | -0.066 |
irling | -0.066 |
Token | to |
Token ID | 284 |
Feature activation | +8.04e+00 |
Pos logit contributions | |
anut | +0.143 |
i | +0.142 |
impressed | +0.139 |
careful | +0.136 |
bled | +0.136 |
Neg logit contributions | |
There | -0.028 |
This | -0.027 |
Circ | -0.025 |
Across | -0.023 |
Where | -0.023 |
Token | him |
Token ID | 683 |
Feature activation | -3.41e+00 |
Pos logit contributions | |
There | +0.191 |
Across | +0.184 |
This | +0.179 |
Circ | +0.177 |
On | +0.176 |
Neg logit contributions | |
anut | -0.240 |
i | -0.238 |
impressed | -0.229 |
glad | -0.226 |
careful | -0.222 |
Token | soon |
Token ID | 2582 |
Feature activation | -6.78e+00 |
Pos logit contributions | |
i | +0.100 |
anut | +0.099 |
careful | +0.094 |
impressed | +0.094 |
glad | +0.090 |
Neg logit contributions | |
Circ | -0.079 |
There | -0.079 |
This | -0.075 |
Across | -0.074 |
growth | -0.074 |
Token | ." |
Token ID | 526 |
Feature activation | +4.11e+00 |
Pos logit contributions | |
i | +0.183 |
careful | +0.170 |
anut | +0.168 |
ising | +0.166 |
ies | +0.164 |
Neg logit contributions | |
Circ | -0.189 |
growth | -0.171 |
Across | -0.167 |
There | -0.166 |
Where | -0.165 |
Token | |
Token ID | 220 |
Feature activation | -1.33e+00 |
Pos logit contributions | |
Circ | +0.074 |
ride | +0.071 |
university | +0.070 |
hike | +0.070 |
Across | +0.069 |
Neg logit contributions | |
anut | -0.134 |
impressed | -0.133 |
glad | -0.131 |
i | -0.131 |
thankful | -0.128 |
Token |
|
Token ID | 198 |
Feature activation | +4.20e+00 |
Pos logit contributions | |
glad | +0.041 |
impressed | +0.040 |
anut | +0.039 |
bled | +0.038 |
careful | +0.038 |
Neg logit contributions | |
Circ | -0.028 |
ride | -0.027 |
hike | -0.027 |
growth | -0.027 |
shortcut | -0.026 |
Token | " |
Token ID | 366 |
Feature activation | +4.31e+00 |
Pos logit contributions | |
impressed | +0.285 |
anut | +0.285 |
i | +0.277 |
bled | +0.275 |
glad | +0.274 |
Neg logit contributions | |
Circ | -0.112 |
There | -0.109 |
growth | -0.105 |
This | -0.104 |
university | -0.103 |
Token | Hi |
Token ID | 17250 |
Feature activation | -4.60e+00 |
Pos logit contributions | |
There | +0.086 |
This | +0.082 |
university | +0.079 |
To | +0.078 |
Across | +0.078 |
Neg logit contributions | |
anut | -0.141 |
i | -0.138 |
impressed | -0.135 |
bly | -0.133 |
years | -0.132 |
Token | , |
Token ID | 11 |
Feature activation | -7.46e+00 |
Pos logit contributions | |
i | +0.093 |
impressed | +0.085 |
anut | +0.085 |
maid | +0.082 |
careful | +0.079 |
Neg logit contributions | |
growth | -0.152 |
Circ | -0.148 |
There | -0.147 |
To | -0.144 |
This | -0.144 |
Token | can |
Token ID | 460 |
Feature activation | +3.11e+00 |
Pos logit contributions | |
glad | +0.215 |
impressed | +0.214 |
anut | +0.209 |
careful | +0.206 |
i | +0.206 |
Neg logit contributions | |
growth | -0.173 |
Circ | -0.171 |
ork | -0.166 |
Across | -0.166 |
On | -0.162 |
Token | I |
Token ID | 314 |
Feature activation | +4.62e+00 |
Pos logit contributions | |
Circ | +0.068 |
growth | +0.064 |
Across | +0.064 |
There | +0.063 |
ork | +0.063 |
Neg logit contributions | |
i | -0.091 |
anut | -0.090 |
impressed | -0.090 |
bly | -0.089 |
glad | -0.087 |
Token | play |
Token ID | 711 |
Feature activation | +7.51e+00 |
Pos logit contributions | |
There | +0.079 |
Circ | +0.076 |
This | +0.075 |
Across | +0.073 |
Where | +0.071 |
Neg logit contributions | |
anut | -0.164 |
i | -0.164 |
impressed | -0.158 |
careful | -0.156 |
bly | -0.154 |
Token | too |
Token ID | 1165 |
Feature activation | -2.92e+00 |
Pos logit contributions | |
There | +0.211 |
growth | +0.198 |
Across | +0.197 |
Circ | +0.195 |
This | +0.194 |
Neg logit contributions | |
impressed | -0.185 |
anut | -0.185 |
bly | -0.180 |
careful | -0.175 |
irling | -0.175 |
Token | ?" |
Token ID | 1701 |
Feature activation | +6.16e+00 |
Pos logit contributions | |
i | +0.079 |
impressed | +0.078 |
anut | +0.077 |
glad | +0.075 |
bly | +0.075 |
Neg logit contributions | |
There | -0.069 |
Circ | -0.068 |
growth | -0.068 |
This | -0.066 |
Across | -0.064 |
Token | One |
Token ID | 1881 |
Feature activation | +2.92e-01 |
Pos logit contributions | |
shortcut | +0.140 |
Circ | +0.139 |
exit | +0.135 |
Across | +0.131 |
hike | +0.131 |
Neg logit contributions | |
gigg | -0.169 |
i | -0.167 |
anut | -0.166 |
impressed | -0.166 |
glad | -0.166 |
Token | of |
Token ID | 286 |
Feature activation | +6.44e+00 |
Pos logit contributions | |
There | +0.005 |
This | +0.005 |
Circ | +0.005 |
To | +0.005 |
Where | +0.005 |
Neg logit contributions | |
anut | -0.010 |
i | -0.010 |
impressed | -0.010 |
careful | -0.010 |
bly | -0.010 |
Token | the |
Token ID | 262 |
Feature activation | +5.58e+00 |
Pos logit contributions | |
There | +0.110 |
This | +0.107 |
To | +0.099 |
Where | +0.097 |
Circ | +0.097 |
Neg logit contributions | |
anut | -0.234 |
impressed | -0.225 |
i | -0.225 |
ushed | -0.221 |
irling | -0.218 |
Token | car |
Token ID | 1097 |
Feature activation | +6.53e+00 |
Pos logit contributions | |
There | +0.053 |
This | +0.052 |
ride | +0.052 |
To | +0.050 |
Circ | +0.050 |
Neg logit contributions | |
anut | -0.068 |
i | -0.065 |
impressed | -0.063 |
bly | -0.062 |
years | -0.061 |
Token | went |
Token ID | 1816 |
Feature activation | +3.22e+00 |
Pos logit contributions | |
There | +0.160 |
ride | +0.158 |
This | +0.156 |
Circ | +0.150 |
To | +0.149 |
Neg logit contributions | |
anut | -0.192 |
i | -0.180 |
impressed | -0.177 |
bly | -0.173 |
years | -0.173 |
Token | for |
Token ID | 329 |
Feature activation | +5.12e+00 |
Pos logit contributions | |
There | +0.069 |
This | +0.067 |
Circ | +0.066 |
Where | +0.065 |
Across | +0.065 |
Neg logit contributions | |
anut | -0.100 |
i | -0.098 |
impressed | -0.097 |
bled | -0.095 |
bly | -0.095 |
Token | an |
Token ID | 281 |
Feature activation | -1.62e+00 |
Pos logit contributions | |
There | +0.088 |
This | +0.087 |
university | +0.084 |
ride | +0.084 |
To | +0.084 |
Neg logit contributions | |
anut | -0.186 |
i | -0.183 |
impressed | -0.177 |
ushed | -0.177 |
bled | -0.175 |
Token | amazing |
Token ID | 4998 |
Feature activation | -5.89e+00 |
Pos logit contributions | |
anut | +0.061 |
i | +0.059 |
ushed | +0.059 |
bled | +0.058 |
impressed | +0.058 |
Neg logit contributions | |
ride | -0.027 |
There | -0.024 |
This | -0.024 |
university | -0.023 |
visit | -0.023 |
Token | ride |
Token ID | 6594 |
Feature activation | +9.46e+00 |
Pos logit contributions | |
careful | +0.176 |
anut | +0.174 |
impressed | +0.173 |
i | +0.172 |
ising | +0.170 |
Neg logit contributions | |
There | -0.130 |
Where | -0.120 |
This | -0.119 |
Across | -0.118 |
Circ | -0.114 |
Token | around |
Token ID | 1088 |
Feature activation | +1.03e+01 |
Pos logit contributions | |
There | +0.223 |
This | +0.219 |
Circ | +0.212 |
To | +0.207 |
Where | +0.207 |
Neg logit contributions | |
anut | -0.278 |
i | -0.269 |
impressed | -0.264 |
bled | -0.257 |
bly | -0.256 |
Token | the |
Token ID | 262 |
Feature activation | +6.29e+00 |
Pos logit contributions | |
There | +0.216 |
This | +0.208 |
Circ | +0.204 |
Across | +0.202 |
Where | +0.198 |
Neg logit contributions | |
anut | -0.329 |
i | -0.324 |
impressed | -0.321 |
bled | -0.310 |
years | -0.309 |
Token | park |
Token ID | 3952 |
Feature activation | +7.74e+00 |
Pos logit contributions | |
There | +0.118 |
This | +0.115 |
Circ | +0.110 |
Across | +0.108 |
On | +0.107 |
Neg logit contributions | |
careful | -0.211 |
impressed | -0.211 |
anut | -0.208 |
glad | -0.205 |
ising | -0.201 |
Token | and |
Token ID | 290 |
Feature activation | -6.28e+00 |
Pos logit contributions | |
There | +0.177 |
This | +0.171 |
Circ | +0.170 |
Across | +0.165 |
Where | +0.165 |
Neg logit contributions | |
anut | -0.231 |
i | -0.226 |
impressed | -0.222 |
bled | -0.214 |
careful | -0.213 |
Token | played |
Token ID | 2826 |
Feature activation | +5.16e+00 |
Pos logit contributions | |
anut | +0.189 |
bly | +0.180 |
i | +0.180 |
ising | +0.173 |
impressed | +0.172 |
Neg logit contributions | |
ride | -0.150 |
There | -0.147 |
This | -0.146 |
to | -0.141 |
To | -0.140 |
Token | scattered |
Token ID | 16830 |
Feature activation | -2.97e-01 |
Pos logit contributions | |
There | +0.089 |
This | +0.088 |
Circ | +0.086 |
To | +0.084 |
university | +0.084 |
Neg logit contributions | |
anut | -0.094 |
i | -0.092 |
impressed | -0.090 |
bled | -0.088 |
bly | -0.088 |
Token | blocks |
Token ID | 7021 |
Feature activation | +9.15e+00 |
Pos logit contributions | |
anut | +0.009 |
i | +0.008 |
bled | +0.008 |
impressed | +0.008 |
bly | +0.008 |
Neg logit contributions | |
This | -0.007 |
There | -0.007 |
Circ | -0.007 |
ride | -0.006 |
Across | -0.006 |
Token | all |
Token ID | 477 |
Feature activation | +2.57e+00 |
Pos logit contributions | |
Circ | +0.250 |
This | +0.244 |
Where | +0.243 |
There | +0.241 |
Across | +0.233 |
Neg logit contributions | |
impressed | -0.230 |
i | -0.230 |
bled | -0.229 |
anut | -0.228 |
careful | -0.224 |
Token | over |
Token ID | 625 |
Feature activation | +5.34e+00 |
Pos logit contributions | |
There | +0.052 |
Circ | +0.050 |
This | +0.050 |
Where | +0.049 |
Across | +0.049 |
Neg logit contributions | |
anut | -0.082 |
i | -0.081 |
impressed | -0.080 |
bly | -0.078 |
bled | -0.078 |
Token | the |
Token ID | 262 |
Feature activation | +4.43e+00 |
Pos logit contributions | |
There | +0.111 |
This | +0.111 |
Circ | +0.106 |
Across | +0.106 |
ride | +0.105 |
Neg logit contributions | |
anut | -0.170 |
i | -0.167 |
impressed | -0.162 |
years | -0.159 |
careful | -0.159 |
Token | floor |
Token ID | 4314 |
Feature activation | +7.52e+00 |
Pos logit contributions | |
Circ | +0.110 |
This | +0.109 |
There | +0.107 |
Where | +0.107 |
Across | +0.105 |
Neg logit contributions | |
anut | -0.116 |
careful | -0.116 |
impressed | -0.112 |
glad | -0.108 |
gigg | -0.104 |
Token | . |
Token ID | 13 |
Feature activation | +4.43e+00 |
Pos logit contributions | |
There | +0.132 |
This | +0.129 |
Circ | +0.126 |
Where | +0.121 |
Across | +0.120 |
Neg logit contributions | |
anut | -0.265 |
i | -0.259 |
impressed | -0.256 |
bled | -0.249 |
bly | -0.248 |
Token | It |
Token ID | 632 |
Feature activation | +3.20e+00 |
Pos logit contributions | |
ride | +0.087 |
hike | +0.082 |
Circ | +0.079 |
shortcut | +0.076 |
visit | +0.076 |
Neg logit contributions | |
impressed | -0.143 |
glad | -0.140 |
bled | -0.139 |
anut | -0.136 |
careful | -0.135 |
Token | hit |
Token ID | 2277 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
There | +0.077 |
This | +0.076 |
Circ | +0.073 |
To | +0.072 |
Where | +0.072 |
Neg logit contributions | |
anut | -0.093 |
i | -0.090 |
impressed | -0.088 |
bled | -0.086 |
bly | -0.085 |
Token | Ben |
Token ID | 3932 |
Feature activation | -6.74e+00 |
Pos logit contributions | |
There | +0.069 |
This | +0.067 |
Circ | +0.066 |
ride | +0.065 |
Across | +0.064 |
Neg logit contributions | |
anut | -0.105 |
i | -0.103 |
impressed | -0.099 |
bled | -0.099 |
bly | -0.098 |
Token | on |
Token ID | 319 |
Feature activation | +8.50e+00 |
Pos logit contributions | |
anut | +0.192 |
i | +0.188 |
careful | +0.181 |
impressed | +0.180 |
bly | +0.177 |
Neg logit contributions | |
There | -0.167 |
Circ | -0.163 |
This | -0.157 |
Where | -0.157 |
Across | -0.154 |
Token | blocks |
Token ID | 7021 |
Feature activation | +9.02e+00 |
Pos logit contributions | |
anut | +0.048 |
i | +0.047 |
impressed | +0.047 |
careful | +0.045 |
irling | +0.044 |
Neg logit contributions | |
This | -0.036 |
There | -0.036 |
Circ | -0.035 |
To | -0.034 |
Where | -0.034 |
Token | , |
Token ID | 11 |
Feature activation | -8.40e+00 |
Pos logit contributions | |
Circ | +0.215 |
Where | +0.205 |
This | +0.203 |
There | +0.201 |
Across | +0.198 |
Neg logit contributions | |
i | -0.272 |
anut | -0.261 |
impressed | -0.258 |
bled | -0.254 |
careful | -0.253 |
Token | but |
Token ID | 475 |
Feature activation | -5.49e+00 |
Pos logit contributions | |
anut | +0.244 |
i | +0.232 |
impressed | +0.227 |
bly | +0.222 |
ushed | +0.222 |
Neg logit contributions | |
There | -0.214 |
This | -0.205 |
ride | -0.196 |
To | -0.195 |
Across | -0.194 |
Token | it |
Token ID | 340 |
Feature activation | +5.44e+00 |
Pos logit contributions | |
anut | +0.161 |
i | +0.151 |
impressed | +0.151 |
bly | +0.148 |
irling | +0.148 |
Neg logit contributions | |
There | -0.132 |
This | -0.128 |
To | -0.123 |
Where | -0.123 |
university | -0.122 |
Token | falls |
Token ID | 8953 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
There | +0.109 |
This | +0.105 |
Circ | +0.100 |
Across | +0.099 |
ride | +0.098 |
Neg logit contributions | |
anut | -0.182 |
impressed | -0.174 |
i | -0.174 |
bly | -0.169 |
years | -0.169 |
Token | down |
Token ID | 866 |
Feature activation | +1.13e+01 |
Pos logit contributions | |
i | +0.039 |
bled | +0.038 |
bly | +0.038 |
gigg | +0.037 |
broken | +0.037 |
Neg logit contributions | |
Circ | -0.032 |
There | -0.032 |
university | -0.031 |
This | -0.031 |
Where | -0.031 |
Token | . |
Token ID | 13 |
Feature activation | +4.46e+00 |
Pos logit contributions | |
There | +0.233 |
This | +0.226 |
Circ | +0.225 |
Where | +0.217 |
Across | +0.214 |
Neg logit contributions | |
anut | -0.351 |
i | -0.349 |
impressed | -0.347 |
bly | -0.339 |
bled | -0.336 |
Token | She |
Token ID | 1375 |
Feature activation | -4.54e+00 |
Pos logit contributions | |
ride | +0.101 |
hike | +0.100 |
shortcut | +0.095 |
hunt | +0.092 |
Circ | +0.091 |
Neg logit contributions | |
glad | -0.126 |
impressed | -0.123 |
gigg | -0.120 |
careful | -0.119 |
years | -0.118 |
Token | tries |
Token ID | 8404 |
Feature activation | +2.75e+00 |
Pos logit contributions | |
anut | +0.138 |
impressed | +0.132 |
i | +0.131 |
bly | +0.128 |
bled | +0.128 |
Neg logit contributions | |
There | -0.105 |
This | -0.102 |
Circ | -0.097 |
Where | -0.096 |
ride | -0.096 |
Token | again |
Token ID | 757 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
There | +0.031 |
Circ | +0.028 |
Across | +0.027 |
This | +0.027 |
growth | +0.026 |
Neg logit contributions | |
careful | -0.113 |
i | -0.113 |
impressed | -0.113 |
anut | -0.113 |
gigg | -0.112 |
Token | , |
Token ID | 11 |
Feature activation | -7.85e+00 |
Pos logit contributions | |
There | +0.063 |
This | +0.061 |
Circ | +0.061 |
Across | +0.060 |
Where | +0.059 |
Neg logit contributions | |
anut | -0.049 |
i | -0.049 |
impressed | -0.048 |
careful | -0.046 |
ising | -0.044 |
Token | to |
Token ID | 284 |
Feature activation | +8.91e+00 |
Pos logit contributions | |
Across | +0.002 |
There | +0.002 |
On | +0.002 |
exit | +0.002 |
This | +0.002 |
Neg logit contributions | |
anut | -0.006 |
i | -0.006 |
careful | -0.006 |
gigg | -0.006 |
years | -0.006 |
Token | run |
Token ID | 1057 |
Feature activation | +1.15e+00 |
Pos logit contributions | |
There | +0.203 |
Across | +0.192 |
This | +0.188 |
Circ | +0.188 |
Where | +0.182 |
Neg logit contributions | |
anut | -0.276 |
i | -0.267 |
impressed | -0.256 |
careful | -0.255 |
gigg | -0.254 |
Token | away |
Token ID | 1497 |
Feature activation | +1.75e+00 |
Pos logit contributions | |
There | +0.032 |
This | +0.031 |
Across | +0.030 |
Circ | +0.030 |
Where | +0.030 |
Neg logit contributions | |
anut | -0.028 |
i | -0.028 |
impressed | -0.027 |
careful | -0.026 |
bly | -0.026 |
Token | . |
Token ID | 13 |
Feature activation | +5.49e+00 |
Pos logit contributions | |
There | +0.040 |
This | +0.039 |
Circ | +0.038 |
To | +0.037 |
ride | +0.037 |
Neg logit contributions | |
anut | -0.053 |
i | -0.051 |
impressed | -0.050 |
bled | -0.049 |
bly | -0.049 |
Token | He |
Token ID | 679 |
Feature activation | -2.93e+00 |
Pos logit contributions | |
ride | +0.097 |
Circ | +0.096 |
hike | +0.090 |
university | +0.089 |
Across | +0.088 |
Neg logit contributions | |
anut | -0.184 |
impressed | -0.181 |
careful | -0.180 |
glad | -0.179 |
i | -0.177 |
Token | let |
Token ID | 1309 |
Feature activation | +5.48e+00 |
Pos logit contributions | |
anut | +0.083 |
bly | +0.081 |
bled | +0.079 |
impressed | +0.078 |
irling | +0.078 |
Neg logit contributions | |
There | -0.073 |
ride | -0.071 |
to | -0.069 |
To | -0.069 |
Under | -0.069 |
Token | go |
Token ID | 467 |
Feature activation | +3.59e+00 |
Pos logit contributions | |
There | +0.125 |
Circ | +0.123 |
Across | +0.120 |
university | +0.118 |
This | +0.118 |
Neg logit contributions | |
i | -0.162 |
anut | -0.156 |
bled | -0.152 |
bly | -0.144 |
careful | -0.144 |
Token | of |
Token ID | 286 |
Feature activation | +7.00e+00 |
Pos logit contributions | |
There | +0.069 |
Circ | +0.069 |
Across | +0.068 |
This | +0.067 |
growth | +0.066 |
Neg logit contributions | |
anut | -0.116 |
i | -0.114 |
glad | -0.109 |
careful | -0.109 |
impressed | -0.107 |
Token | Mom |
Token ID | 11254 |
Feature activation | -5.91e+00 |
Pos logit contributions | |
There | +0.134 |
This | +0.130 |
Circ | +0.127 |
Across | +0.125 |
Where | +0.123 |
Neg logit contributions | |
anut | -0.238 |
i | -0.232 |
impressed | -0.227 |
bled | -0.221 |
bly | -0.221 |
Token | 's |
Token ID | 338 |
Feature activation | -2.76e+00 |
Pos logit contributions | |
anut | +0.160 |
i | +0.160 |
careful | +0.153 |
gigg | +0.145 |
glad | +0.141 |
Neg logit contributions | |
growth | -0.152 |
Where | -0.150 |
Circ | -0.149 |
There | -0.147 |
Across | -0.146 |
Token | hand |
Token ID | 1021 |
Feature activation | +7.85e+00 |
Pos logit contributions | |
careful | +0.070 |
mad | +0.066 |
anut | +0.066 |
gigg | +0.065 |
glad | +0.065 |
Neg logit contributions | |
Circ | -0.074 |
To | -0.073 |
There | -0.070 |
Across | -0.070 |
On | -0.069 |
Token | very |
Token ID | 845 |
Feature activation | -1.20e+01 |
Pos logit contributions | |
anut | +0.059 |
i | +0.056 |
ushed | +0.055 |
bled | +0.054 |
maid | +0.053 |
Neg logit contributions | |
There | -0.078 |
This | -0.078 |
to | -0.075 |
ride | -0.075 |
university | -0.074 |
Token | nice |
Token ID | 3621 |
Feature activation | -7.20e+00 |
Pos logit contributions | |
anut | +0.277 |
i | +0.271 |
bled | +0.271 |
maid | +0.263 |
ishly | +0.260 |
Neg logit contributions | |
to | -0.350 |
There | -0.349 |
This | -0.347 |
ride | -0.346 |
university | -0.342 |
Token | . |
Token ID | 13 |
Feature activation | +4.87e+00 |
Pos logit contributions | |
i | +0.187 |
anut | +0.183 |
impressed | +0.180 |
careful | +0.176 |
glad | +0.172 |
Neg logit contributions | |
There | -0.192 |
This | -0.190 |
Circ | -0.189 |
Across | -0.185 |
To | -0.182 |
Token | Every |
Token ID | 3887 |
Feature activation | +5.35e-01 |
Pos logit contributions | |
shortcut | +0.119 |
ork | +0.117 |
hike | +0.108 |
Circ | +0.107 |
hunt | +0.102 |
Neg logit contributions | |
glad | -0.135 |
gigg | -0.127 |
i | -0.121 |
then | -0.120 |
happy | -0.120 |
Token | day |
Token ID | 1110 |
Feature activation | +3.33e+00 |
Pos logit contributions | |
There | +0.014 |
Circ | +0.014 |
This | +0.013 |
To | +0.013 |
Where | +0.013 |
Neg logit contributions | |
i | -0.014 |
careful | -0.013 |
glad | -0.013 |
thankful | -0.013 |
pleased | -0.012 |
Token | she |
Token ID | 673 |
Feature activation | -2.48e+00 |
Pos logit contributions | |
Circ | +0.097 |
There | +0.095 |
This | +0.094 |
Across | +0.093 |
growth | +0.091 |
Neg logit contributions | |
i | -0.079 |
anut | -0.072 |
careful | -0.071 |
impressed | -0.071 |
glad | -0.070 |
Token | went |
Token ID | 1816 |
Feature activation | +1.82e+00 |
Pos logit contributions | |
i | +0.071 |
anut | +0.070 |
impressed | +0.066 |
careful | +0.065 |
gigg | +0.065 |
Neg logit contributions | |
This | -0.062 |
There | -0.060 |
Where | -0.059 |
Circ | -0.057 |
Across | -0.057 |
Token | to |
Token ID | 284 |
Feature activation | +7.62e+00 |
Pos logit contributions | |
growth | +0.037 |
Where | +0.034 |
There | +0.034 |
Circ | +0.032 |
CA | +0.031 |
Neg logit contributions | |
careful | -0.060 |
proud | -0.060 |
i | -0.059 |
patiently | -0.059 |
glad | -0.058 |
Token | work |
Token ID | 670 |
Feature activation | +1.40e+00 |
Pos logit contributions | |
ork | +0.241 |
There | +0.237 |
Across | +0.232 |
Where | +0.230 |
ase | +0.230 |
Neg logit contributions | |
happy | -0.164 |
glad | -0.151 |
gigg | -0.149 |
careful | -0.148 |
proud | -0.145 |
Token | and |
Token ID | 290 |
Feature activation | -7.75e+00 |
Pos logit contributions | |
There | +0.037 |
Where | +0.035 |
Circ | +0.035 |
This | +0.035 |
Across | +0.034 |
Neg logit contributions | |
anut | -0.037 |
i | -0.037 |
impressed | -0.037 |
careful | -0.036 |
irling | -0.035 |
Token | made |
Token ID | 925 |
Feature activation | +3.17e+00 |
Pos logit contributions | |
anut | +0.204 |
i | +0.204 |
impressed | +0.198 |
careful | +0.198 |
gigg | +0.191 |
Neg logit contributions | |
There | -0.201 |
This | -0.196 |
Circ | -0.195 |
Where | -0.190 |
Across | -0.189 |
Token | to |
Token ID | 284 |
Feature activation | +9.27e+00 |
Pos logit contributions | |
There | +0.047 |
Where | +0.046 |
This | +0.046 |
Circ | +0.046 |
growth | +0.045 |
Neg logit contributions | |
anut | -0.137 |
i | -0.136 |
careful | -0.136 |
glad | -0.134 |
gigg | -0.133 |
Token | the |
Token ID | 262 |
Feature activation | +6.06e+00 |
Pos logit contributions | |
Across | +0.228 |
There | +0.226 |
Circ | +0.217 |
ork | +0.216 |
To | +0.215 |
Neg logit contributions | |
glad | -0.239 |
gigg | -0.236 |
impressed | -0.229 |
anut | -0.227 |
i | -0.226 |
Token | store |
Token ID | 3650 |
Feature activation | +6.08e+00 |
Pos logit contributions | |
There | +0.133 |
This | +0.129 |
Circ | +0.125 |
Across | +0.121 |
growth | +0.118 |
Neg logit contributions | |
anut | -0.183 |
careful | -0.179 |
impressed | -0.173 |
i | -0.172 |
glad | -0.171 |
Token | and |
Token ID | 290 |
Feature activation | -6.09e+00 |
Pos logit contributions | |
There | +0.124 |
This | +0.120 |
Circ | +0.119 |
Across | +0.116 |
Where | +0.116 |
Neg logit contributions | |
anut | -0.198 |
i | -0.193 |
impressed | -0.192 |
careful | -0.185 |
glad | -0.182 |
Token | found |
Token ID | 1043 |
Feature activation | +4.14e+00 |
Pos logit contributions | |
anut | +0.180 |
i | +0.174 |
impressed | +0.170 |
careful | +0.164 |
years | +0.164 |
Neg logit contributions | |
There | -0.140 |
This | -0.139 |
Circ | -0.136 |
Across | -0.132 |
Where | -0.131 |
Token | a |
Token ID | 257 |
Feature activation | +2.78e+00 |
Pos logit contributions | |
There | +0.078 |
This | +0.076 |
Circ | +0.073 |
To | +0.071 |
Where | +0.071 |
Neg logit contributions | |
anut | -0.143 |
i | -0.139 |
impressed | -0.137 |
bly | -0.133 |
bled | -0.133 |
Token | soft |
Token ID | 2705 |
Feature activation | -6.38e+00 |
Pos logit contributions | |
There | +0.072 |
This | +0.071 |
Circ | +0.068 |
Across | +0.067 |
Where | +0.066 |
Neg logit contributions | |
anut | -0.075 |
impressed | -0.073 |
i | -0.072 |
careful | -0.071 |
glad | -0.069 |
Token | and |
Token ID | 290 |
Feature activation | -6.14e+00 |
Pos logit contributions | |
i | +0.197 |
anut | +0.196 |
impressed | +0.188 |
careful | +0.186 |
bly | +0.185 |
Neg logit contributions | |
There | -0.135 |
Circ | -0.129 |
This | -0.128 |
Where | -0.124 |
Across | -0.122 |
Token | cozy |
Token ID | 37438 |
Feature activation | -5.32e+00 |
Pos logit contributions | |
anut | +0.141 |
i | +0.136 |
impressed | +0.128 |
bled | +0.127 |
bly | +0.125 |
Neg logit contributions | |
There | -0.187 |
This | -0.183 |
ride | -0.180 |
Circ | -0.179 |
Across | -0.178 |
Token | bed |
Token ID | 3996 |
Feature activation | +5.87e+00 |
Pos logit contributions | |
anut | +0.168 |
i | +0.167 |
careful | +0.160 |
impressed | +0.159 |
glad | +0.157 |
Neg logit contributions | |
There | -0.110 |
This | -0.106 |
Circ | -0.105 |
Across | -0.101 |
Where | -0.100 |
Token | . |
Token ID | 13 |
Feature activation | +5.96e+00 |
Pos logit contributions | |
Circ | +0.122 |
There | +0.118 |
This | +0.115 |
Where | +0.114 |
Across | +0.112 |
Neg logit contributions | |
anut | -0.185 |
bled | -0.184 |
bly | -0.183 |
i | -0.183 |
impressed | -0.182 |
Token | independent |
Token ID | 4795 |
Feature activation | -7.41e+00 |
Pos logit contributions | |
anut | +0.230 |
i | +0.226 |
maid | +0.221 |
ushed | +0.199 |
ious | +0.186 |
Neg logit contributions | |
ride | -0.350 |
This | -0.344 |
to | -0.337 |
There | -0.336 |
Where | -0.323 |
Token | and |
Token ID | 290 |
Feature activation | -8.59e+00 |
Pos logit contributions | |
i | +0.221 |
anut | +0.220 |
impressed | +0.214 |
bled | +0.207 |
careful | +0.206 |
Neg logit contributions | |
There | -0.168 |
Circ | -0.166 |
This | -0.164 |
Across | -0.159 |
To | -0.157 |
Token | finding |
Token ID | 4917 |
Feature activation | +7.23e-01 |
Pos logit contributions | |
anut | +0.189 |
i | +0.183 |
maid | +0.171 |
bly | +0.160 |
ushed | +0.158 |
Neg logit contributions | |
This | -0.269 |
ride | -0.267 |
There | -0.265 |
to | -0.264 |
To | -0.258 |
Token | a |
Token ID | 257 |
Feature activation | +6.60e-01 |
Pos logit contributions | |
There | +0.018 |
This | +0.017 |
Across | +0.017 |
On | +0.017 |
Circ | +0.017 |
Neg logit contributions | |
anut | -0.020 |
impressed | -0.020 |
i | -0.020 |
careful | -0.020 |
bly | -0.020 |
Token | solution |
Token ID | 4610 |
Feature activation | +3.55e+00 |
Pos logit contributions | |
There | +0.017 |
This | +0.017 |
Across | +0.016 |
Circ | +0.016 |
On | +0.016 |
Neg logit contributions | |
anut | -0.018 |
impressed | -0.018 |
careful | -0.017 |
pleased | -0.017 |
i | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | +3.77e+00 |
Pos logit contributions | |
There | +0.081 |
Circ | +0.079 |
Across | +0.076 |
growth | +0.073 |
This | +0.073 |
Neg logit contributions | |
i | -0.112 |
anut | -0.108 |
impressed | -0.107 |
careful | -0.106 |
bled | -0.103 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +7.26e+00 |
Pos logit contributions | |
Circ | +0.066 |
ride | +0.064 |
university | +0.060 |
hike | +0.060 |
ork | +0.059 |
Neg logit contributions | |
anut | -0.126 |
impressed | -0.125 |
i | -0.124 |
glad | -0.124 |
years | -0.119 |
Token | Mark |
Token ID | 9704 |
Feature activation | -4.95e+00 |
Pos logit contributions | |
university | +0.150 |
hike | +0.148 |
dictionary | +0.147 |
Circ | +0.146 |
ride | +0.146 |
Neg logit contributions | |
aming | -0.194 |
i | -0.192 |
glad | -0.191 |
gigg | -0.189 |
bled | -0.187 |
Token | was |
Token ID | 373 |
Feature activation | -2.83e+00 |
Pos logit contributions | |
anut | +0.141 |
impressed | +0.135 |
bly | +0.134 |
bled | +0.134 |
years | +0.131 |
Neg logit contributions | |
There | -0.120 |
This | -0.118 |
ride | -0.114 |
On | -0.112 |
To | -0.112 |
Token | scared |
Token ID | 12008 |
Feature activation | -7.14e+00 |
Pos logit contributions | |
anut | +0.082 |
i | +0.080 |
maid | +0.077 |
ising | +0.075 |
ushed | +0.075 |
Neg logit contributions | |
There | -0.072 |
This | -0.069 |
to | -0.067 |
On | -0.067 |
ride | -0.067 |
Token | . |
Token ID | 13 |
Feature activation | +4.61e+00 |
Pos logit contributions | |
i | +0.208 |
anut | +0.203 |
impressed | +0.200 |
careful | +0.194 |
bly | +0.193 |
Neg logit contributions | |
Circ | -0.167 |
There | -0.163 |
This | -0.161 |
Where | -0.157 |
Across | -0.155 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +7.24e+00 |
Pos logit contributions | |
hike | +0.077 |
shortcut | +0.071 |
Circ | +0.071 |
ride | +0.070 |
ork | +0.068 |
Neg logit contributions | |
glad | -0.113 |
grateful | -0.103 |
anut | -0.103 |
impressed | -0.103 |
thankful | -0.102 |
Token | Anna |
Token ID | 31160 |
Feature activation | -7.17e+00 |
Pos logit contributions | |
shortcut | +0.153 |
hike | +0.151 |
dictionary | +0.147 |
university | +0.147 |
grade | +0.144 |
Neg logit contributions | |
aming | -0.190 |
gigg | -0.176 |
kins | -0.175 |
i | -0.172 |
bly | -0.170 |
Token | and |
Token ID | 290 |
Feature activation | -7.94e+00 |
Pos logit contributions | |
anut | +0.184 |
i | +0.179 |
impressed | +0.173 |
bled | +0.166 |
bly | +0.166 |
Neg logit contributions | |
There | -0.194 |
This | -0.191 |
Circ | -0.187 |
Across | -0.183 |
Where | -0.183 |
Token | Ben |
Token ID | 3932 |
Feature activation | -6.93e+00 |
Pos logit contributions | |
i | +0.229 |
anut | +0.225 |
glad | +0.215 |
impressed | +0.210 |
careful | +0.209 |
Neg logit contributions | |
Circ | -0.186 |
There | -0.180 |
ork | -0.179 |
Across | -0.179 |
This | -0.177 |
Token | are |
Token ID | 389 |
Feature activation | -3.93e+00 |
Pos logit contributions | |
anut | +0.194 |
i | +0.189 |
impressed | +0.183 |
bled | +0.177 |
bly | +0.176 |
Neg logit contributions | |
There | -0.171 |
This | -0.169 |
Circ | -0.165 |
To | -0.161 |
Where | -0.161 |
Token | twins |
Token ID | 20345 |
Feature activation | -2.40e+00 |
Pos logit contributions | |
anut | +0.107 |
ushed | +0.107 |
bly | +0.106 |
i | +0.105 |
ising | +0.105 |
Neg logit contributions | |
to | -0.106 |
There | -0.101 |
This | -0.098 |
| -0.095 |
university | -0.094 |
Token | . |
Token ID | 13 |
Feature activation | +3.86e+00 |
Pos logit contributions | |
anut | +0.088 |
i | +0.086 |
impressed | +0.084 |
bly | +0.082 |
bled | +0.082 |
Neg logit contributions | |
There | -0.040 |
This | -0.038 |
Circ | -0.037 |
Across | -0.036 |
Where | -0.036 |
Token | They |
Token ID | 1119 |
Feature activation | -3.25e+00 |
Pos logit contributions | |
ride | +0.073 |
Circ | +0.073 |
There | +0.073 |
university | +0.073 |
This | +0.071 |
Neg logit contributions | |
anut | -0.123 |
i | -0.120 |
impressed | -0.117 |
glad | -0.115 |
bly | -0.114 |
Token | like |
Token ID | 588 |
Feature activation | -4.90e+00 |
Pos logit contributions | |
anut | +0.093 |
bled | +0.089 |
i | +0.088 |
irling | +0.087 |
impressed | +0.087 |
Neg logit contributions | |
There | -0.081 |
ride | -0.080 |
This | -0.080 |
visit | -0.076 |
On | -0.075 |
Token | to |
Token ID | 284 |
Feature activation | +6.87e+00 |
Pos logit contributions | |
careful | +0.145 |
happy | +0.143 |
glad | +0.140 |
caring | +0.139 |
grateful | +0.138 |
Neg logit contributions | |
growth | -0.111 |
ork | -0.110 |
Across | -0.105 |
nob | -0.100 |
There | -0.100 |
Token | play |
Token ID | 711 |
Feature activation | +5.87e+00 |
Pos logit contributions | |
There | +0.110 |
Circ | +0.107 |
Where | +0.104 |
This | +0.103 |
Across | +0.102 |
Neg logit contributions | |
anut | -0.246 |
i | -0.243 |
impressed | -0.232 |
gigg | -0.232 |
bly | -0.231 |
Token | the |
Token ID | 262 |
Feature activation | +5.37e+00 |
Pos logit contributions | |
anut | +0.225 |
i | +0.215 |
impressed | +0.212 |
years | +0.208 |
bled | +0.207 |
Neg logit contributions | |
There | -0.165 |
This | -0.161 |
ride | -0.156 |
Circ | -0.154 |
Where | -0.151 |
Token | walls |
Token ID | 7714 |
Feature activation | +7.81e+00 |
Pos logit contributions | |
There | +0.136 |
This | +0.134 |
Circ | +0.130 |
Across | +0.128 |
On | +0.128 |
Neg logit contributions | |
anut | -0.149 |
i | -0.145 |
impressed | -0.141 |
bly | -0.137 |
careful | -0.136 |
Token | . |
Token ID | 13 |
Feature activation | +4.72e+00 |
Pos logit contributions | |
There | +0.144 |
This | +0.140 |
Circ | +0.136 |
Where | +0.132 |
To | +0.132 |
Neg logit contributions | |
anut | -0.269 |
i | -0.263 |
impressed | -0.258 |
bly | -0.253 |
bled | -0.252 |
Token | They |
Token ID | 1119 |
Feature activation | -2.32e+00 |
Pos logit contributions | |
ride | +0.097 |
hike | +0.093 |
Circ | +0.092 |
shortcut | +0.089 |
exit | +0.089 |
Neg logit contributions | |
glad | -0.140 |
impressed | -0.139 |
anut | -0.139 |
careful | -0.134 |
i | -0.133 |
Token | laugh |
Token ID | 6487 |
Feature activation | -5.81e+00 |
Pos logit contributions | |
anut | +0.071 |
i | +0.068 |
impressed | +0.068 |
bled | +0.067 |
bly | +0.066 |
Neg logit contributions | |
There | -0.052 |
This | -0.050 |
ride | -0.049 |
Circ | -0.049 |
To | -0.048 |
Token | and |
Token ID | 290 |
Feature activation | -7.13e+00 |
Pos logit contributions | |
anut | +0.179 |
i | +0.168 |
years | +0.164 |
bled | +0.161 |
irling | +0.160 |
Neg logit contributions | |
There | -0.140 |
This | -0.138 |
to | -0.130 |
Circ | -0.129 |
To | -0.129 |
Token | make |
Token ID | 787 |
Feature activation | +4.04e+00 |
Pos logit contributions | |
anut | +0.170 |
i | +0.164 |
impressed | +0.160 |
bly | +0.154 |
bled | +0.153 |
Neg logit contributions | |
There | -0.208 |
This | -0.204 |
Circ | -0.200 |
Across | -0.196 |
Where | -0.196 |
Token | bubbles |
Token ID | 25037 |
Feature activation | +3.92e+00 |
Pos logit contributions | |
This | +0.085 |
There | +0.083 |
ride | +0.082 |
to | +0.082 |
Circ | +0.082 |
Neg logit contributions | |
anut | -0.127 |
years | -0.121 |
i | -0.120 |
bled | -0.118 |
ushed | -0.118 |
Token | . |
Token ID | 13 |
Feature activation | +4.77e+00 |
Pos logit contributions | |
There | +0.073 |
This | +0.073 |
ride | +0.070 |
Circ | +0.069 |
To | +0.067 |
Neg logit contributions | |
anut | -0.133 |
i | -0.128 |
years | -0.127 |
impressed | -0.126 |
bled | -0.124 |
Token |
|
Token ID | 198 |
Feature activation | +4.22e+00 |
Pos logit contributions | |
ride | +0.100 |
hike | +0.096 |
Circ | +0.094 |
shortcut | +0.094 |
exit | +0.094 |
Neg logit contributions | |
glad | -0.140 |
impressed | -0.139 |
anut | -0.136 |
careful | -0.134 |
i | -0.131 |
Token |
|
Token ID | 198 |
Feature activation | +4.38e+00 |
Pos logit contributions | |
There | +0.059 |
This | +0.055 |
Circ | +0.055 |
Across | +0.055 |
ride | +0.054 |
Neg logit contributions | |
anut | -0.162 |
impressed | -0.160 |
glad | -0.155 |
careful | -0.154 |
irling | -0.153 |
Token | we |
Token ID | 356 |
Feature activation | +4.33e+00 |
Pos logit contributions | |
glad | +0.201 |
anut | +0.201 |
careful | +0.201 |
i | +0.197 |
impressed | +0.189 |
Neg logit contributions | |
Circ | -0.170 |
On | -0.169 |
Across | -0.169 |
ork | -0.166 |
This | -0.166 |
Token | saw |
Token ID | 2497 |
Feature activation | +3.23e+00 |
Pos logit contributions | |
There | +0.093 |
ride | +0.087 |
This | +0.085 |
Where | +0.083 |
visit | +0.081 |
Neg logit contributions | |
anut | -0.142 |
impressed | -0.137 |
irling | -0.136 |
bled | -0.135 |
i | -0.135 |
Token | a |
Token ID | 257 |
Feature activation | +2.20e+00 |
Pos logit contributions | |
There | +0.081 |
Across | +0.081 |
On | +0.080 |
Circ | +0.079 |
This | +0.078 |
Neg logit contributions | |
anut | -0.087 |
i | -0.086 |
impressed | -0.082 |
glad | -0.081 |
careful | -0.081 |
Token | wolf |
Token ID | 17481 |
Feature activation | +1.30e+00 |
Pos logit contributions | |
This | +0.054 |
There | +0.053 |
On | +0.050 |
Across | +0.049 |
growth | +0.049 |
Neg logit contributions | |
impressed | -0.063 |
anut | -0.062 |
glad | -0.062 |
careful | -0.060 |
pleased | -0.060 |
Token | !" |
Token ID | 2474 |
Feature activation | +4.55e+00 |
Pos logit contributions | |
Circ | +0.039 |
There | +0.039 |
This | +0.038 |
Across | +0.037 |
Where | +0.037 |
Neg logit contributions | |
i | -0.030 |
anut | -0.029 |
ising | -0.027 |
impressed | -0.027 |
bly | -0.027 |
Token | Tom |
Token ID | 4186 |
Feature activation | -5.05e+00 |
Pos logit contributions | |
There | +0.094 |
This | +0.091 |
Circ | +0.085 |
To | +0.085 |
Where | +0.085 |
Neg logit contributions | |
anut | -0.153 |
i | -0.148 |
impressed | -0.144 |
bled | -0.143 |
bly | -0.142 |
Token | said |
Token ID | 531 |
Feature activation | -4.53e+00 |
Pos logit contributions | |
bly | +0.143 |
bled | +0.141 |
irling | +0.141 |
anut | +0.139 |
years | +0.130 |
Neg logit contributions | |
to | -0.138 |
There | -0.138 |
This | -0.126 |
ride | -0.126 |
To | -0.126 |
Token | . |
Token ID | 13 |
Feature activation | +5.43e+00 |
Pos logit contributions | |
anut | +0.184 |
i | +0.176 |
irling | +0.173 |
years | +0.173 |
impressed | +0.173 |
Neg logit contributions | |
There | -0.062 |
This | -0.060 |
to | -0.058 |
To | -0.054 |
ride | -0.053 |
Token | " |
Token ID | 366 |
Feature activation | +4.52e+00 |
Pos logit contributions | |
ride | +0.101 |
hike | +0.099 |
university | +0.095 |
University | +0.093 |
library | +0.091 |
Neg logit contributions | |
impressed | -0.170 |
glad | -0.168 |
gigg | -0.164 |
thankful | -0.164 |
careful | -0.164 |
Token | He |
Token ID | 1544 |
Feature activation | -2.61e+00 |
Pos logit contributions | |
There | +0.104 |
This | +0.102 |
To | +0.098 |
Across | +0.096 |
On | +0.095 |
Neg logit contributions | |
anut | -0.135 |
impressed | -0.129 |
bly | -0.128 |
i | -0.127 |
bled | -0.125 |
Token | was |
Token ID | 373 |
Feature activation | -1.64e+00 |
Pos logit contributions | |
anut | +0.084 |
impressed | +0.081 |
i | +0.080 |
bled | +0.079 |
bly | +0.078 |
Neg logit contributions | |
There | -0.056 |
This | -0.054 |
ride | -0.051 |
Circ | -0.051 |
Where | -0.050 |
Token | pretty |
Token ID | 2495 |
Feature activation | -6.01e+00 |
Pos logit contributions | |
ishly | +0.382 |
anut | +0.372 |
i | +0.366 |
years | +0.359 |
bled | +0.358 |
Neg logit contributions | |
to | -0.258 |
ride | -0.255 |
There | -0.254 |
On | -0.252 |
This | -0.246 |
Token | . |
Token ID | 13 |
Feature activation | +4.64e+00 |
Pos logit contributions | |
anut | +0.224 |
i | +0.216 |
impressed | +0.215 |
bled | +0.210 |
bly | +0.210 |
Neg logit contributions | |
There | -0.096 |
This | -0.094 |
Circ | -0.088 |
ride | -0.087 |
To | -0.086 |
Token | |
Token ID | 220 |
Feature activation | -2.00e+00 |
Pos logit contributions | |
Circ | +0.140 |
ork | +0.135 |
growth | +0.133 |
shortcut | +0.130 |
hike | +0.128 |
Neg logit contributions | |
glad | -0.099 |
gigg | -0.095 |
then | -0.088 |
grateful | -0.087 |
impressed | -0.086 |
Token |
|
Token ID | 198 |
Feature activation | +4.00e+00 |
Pos logit contributions | |
glad | +0.062 |
impressed | +0.060 |
anut | +0.058 |
equally | +0.058 |
careful | +0.057 |
Neg logit contributions | |
growth | -0.042 |
Circ | -0.042 |
hike | -0.041 |
ride | -0.040 |
university | -0.038 |
Token |
|
Token ID | 198 |
Feature activation | +3.96e+00 |
Pos logit contributions | |
There | +0.069 |
growth | +0.069 |
Across | +0.068 |
Circ | +0.068 |
university | +0.063 |
Neg logit contributions | |
impressed | -0.130 |
glad | -0.128 |
anut | -0.126 |
i | -0.125 |
careful | -0.124 |
Token | But |
Token ID | 1537 |
Feature activation | -5.36e+00 |
Pos logit contributions | |
There | +0.109 |
growth | +0.108 |
Circ | +0.107 |
Where | +0.104 |
This | +0.102 |
Neg logit contributions | |
careful | -0.093 |
impressed | -0.090 |
irling | -0.087 |
glad | -0.087 |
equally | -0.086 |
Token | then |
Token ID | 788 |
Feature activation | -4.07e+00 |
Pos logit contributions | |
anut | +0.109 |
impressed | +0.100 |
i | +0.099 |
bly | +0.096 |
irling | +0.094 |
Neg logit contributions | |
There | -0.180 |
This | -0.177 |
ride | -0.171 |
To | -0.171 |
Across | -0.170 |
Token | she |
Token ID | 673 |
Feature activation | -2.46e+00 |
Pos logit contributions | |
anut | +0.102 |
i | +0.096 |
impressed | +0.095 |
bled | +0.092 |
bly | +0.092 |
Neg logit contributions | |
There | -0.116 |
This | -0.114 |
Circ | -0.111 |
Where | -0.109 |
To | -0.109 |
Token | started |
Token ID | 2067 |
Feature activation | +3.51e+00 |
Pos logit contributions | |
anut | +0.079 |
bly | +0.077 |
impressed | +0.076 |
bled | +0.075 |
i | +0.074 |
Neg logit contributions | |
There | -0.050 |
ride | -0.048 |
Across | -0.047 |
to | -0.046 |
This | -0.046 |
Token | to |
Token ID | 284 |
Feature activation | +7.68e+00 |
Pos logit contributions | |
There | +0.040 |
This | +0.036 |
Circ | +0.036 |
Across | +0.035 |
Where | +0.035 |
Neg logit contributions | |
gigg | -0.146 |
i | -0.144 |
impressed | -0.144 |
careful | -0.141 |
irling | -0.139 |
Token | feel |
Token ID | 1254 |
Feature activation | -4.59e+00 |
Pos logit contributions | |
There | +0.211 |
This | +0.207 |
Circ | +0.202 |
To | +0.198 |
Where | +0.198 |
Neg logit contributions | |
anut | -0.197 |
i | -0.190 |
impressed | -0.186 |
bly | -0.179 |
bled | -0.179 |
Token | of |
Token ID | 286 |
Feature activation | +6.61e+00 |
Pos logit contributions | |
There | +0.121 |
This | +0.120 |
Circ | +0.115 |
To | +0.113 |
Where | +0.112 |
Neg logit contributions | |
anut | -0.165 |
i | -0.161 |
impressed | -0.158 |
bled | -0.153 |
bly | -0.153 |
Token | his |
Token ID | 465 |
Feature activation | -3.89e+00 |
Pos logit contributions | |
There | +0.120 |
This | +0.117 |
Circ | +0.112 |
To | +0.109 |
Where | +0.109 |
Neg logit contributions | |
anut | -0.233 |
i | -0.226 |
impressed | -0.223 |
bled | -0.217 |
bly | -0.217 |
Token | feathers |
Token ID | 27400 |
Feature activation | +5.07e+00 |
Pos logit contributions | |
bly | +0.098 |
anut | +0.098 |
bled | +0.096 |
impressed | +0.095 |
years | +0.094 |
Neg logit contributions | |
ride | -0.105 |
There | -0.102 |
to | -0.101 |
visit | -0.099 |
This | -0.099 |
Token | fell |
Token ID | 3214 |
Feature activation | -2.07e+00 |
Pos logit contributions | |
There | +0.129 |
ride | +0.125 |
This | +0.125 |
to | +0.121 |
Across | +0.120 |
Neg logit contributions | |
anut | -0.143 |
i | -0.130 |
years | -0.130 |
impressed | -0.128 |
bly | -0.126 |
Token | out |
Token ID | 503 |
Feature activation | +6.47e+00 |
Pos logit contributions | |
anut | +0.067 |
i | +0.065 |
impressed | +0.064 |
bled | +0.063 |
bly | +0.063 |
Neg logit contributions | |
There | -0.042 |
This | -0.041 |
Circ | -0.040 |
To | -0.039 |
Where | -0.039 |
Token | and |
Token ID | 290 |
Feature activation | -6.88e+00 |
Pos logit contributions | |
There | +0.132 |
This | +0.129 |
Circ | +0.123 |
To | +0.122 |
ride | +0.121 |
Neg logit contributions | |
anut | -0.218 |
i | -0.207 |
impressed | -0.202 |
bled | -0.199 |
irling | -0.198 |
Token | he |
Token ID | 339 |
Feature activation | -1.12e+00 |
Pos logit contributions | |
anut | +0.201 |
i | +0.194 |
impressed | +0.191 |
bled | +0.185 |
bly | +0.185 |
Neg logit contributions | |
There | -0.165 |
This | -0.162 |
Circ | -0.157 |
To | -0.153 |
Where | -0.153 |
Token | realized |
Token ID | 6939 |
Feature activation | -9.07e+00 |
Pos logit contributions | |
anut | +0.033 |
bly | +0.032 |
impressed | +0.031 |
glad | +0.031 |
i | +0.031 |
Neg logit contributions | |
There | -0.025 |
ride | -0.025 |
to | -0.024 |
This | -0.024 |
ork | -0.024 |
Token | that |
Token ID | 326 |
Feature activation | +3.99e+00 |
Pos logit contributions | |
i | +0.259 |
impressed | +0.258 |
anut | +0.254 |
bled | +0.245 |
bly | +0.244 |
Neg logit contributions | |
Circ | -0.221 |
There | -0.218 |
This | -0.215 |
Across | -0.208 |
On | -0.205 |
Token | he |
Token ID | 339 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
There | +0.086 |
This | +0.084 |
Circ | +0.082 |
Across | +0.081 |
To | +0.080 |
Neg logit contributions | |
anut | -0.124 |
impressed | -0.122 |
i | -0.122 |
careful | -0.116 |
glad | -0.115 |
Token | was |
Token ID | 373 |
Feature activation | -2.15e+00 |
Pos logit contributions | |
anut | +0.040 |
impressed | +0.038 |
bly | +0.038 |
pleased | +0.037 |
irling | +0.037 |
Neg logit contributions | |
There | -0.028 |
ride | -0.027 |
This | -0.027 |
to | -0.026 |
Where | -0.026 |
Token | It |
Token ID | 632 |
Feature activation | +4.70e+00 |
Pos logit contributions | |
hike | +0.108 |
Circ | +0.106 |
ride | +0.103 |
growth | +0.101 |
shortcut | +0.097 |
Neg logit contributions | |
impressed | -0.180 |
glad | -0.176 |
pleased | -0.171 |
thankful | -0.167 |
grateful | -0.165 |
Token | had |
Token ID | 550 |
Feature activation | +4.01e+00 |
Pos logit contributions | |
There | +0.104 |
This | +0.100 |
ride | +0.099 |
To | +0.098 |
Across | +0.097 |
Neg logit contributions | |
anut | -0.147 |
i | -0.138 |
irling | -0.137 |
bled | -0.136 |
years | -0.135 |
Token | made |
Token ID | 925 |
Feature activation | +4.59e+00 |
Pos logit contributions | |
There | +0.101 |
This | +0.099 |
Circ | +0.098 |
On | +0.097 |
Where | +0.096 |
Neg logit contributions | |
anut | -0.106 |
careful | -0.105 |
impressed | -0.104 |
i | -0.102 |
years | -0.099 |
Token | the |
Token ID | 262 |
Feature activation | +5.74e+00 |
Pos logit contributions | |
There | +0.097 |
This | +0.095 |
Circ | +0.091 |
To | +0.089 |
Where | +0.089 |
Neg logit contributions | |
anut | -0.147 |
i | -0.142 |
impressed | -0.140 |
bled | -0.136 |
bly | -0.136 |
Token | hose |
Token ID | 31489 |
Feature activation | +7.56e+00 |
Pos logit contributions | |
There | +0.098 |
This | +0.094 |
Circ | +0.090 |
ride | +0.089 |
Across | +0.088 |
Neg logit contributions | |
anut | -0.210 |
i | -0.205 |
impressed | -0.200 |
years | -0.197 |
bled | -0.197 |
Token | sm |
Token ID | 895 |
Feature activation | -4.01e+00 |
Pos logit contributions | |
This | +0.176 |
There | +0.175 |
ride | +0.170 |
to | +0.166 |
To | +0.163 |
Neg logit contributions | |
anut | -0.226 |
i | -0.213 |
years | -0.212 |
impressed | -0.209 |
bled | -0.209 |
Token | elly |
Token ID | 6148 |
Feature activation | -5.04e+00 |
Pos logit contributions | |
anut | +0.133 |
impressed | +0.131 |
i | +0.126 |
irling | +0.124 |
pleased | +0.124 |
Neg logit contributions | |
There | -0.082 |
This | -0.080 |
Circ | -0.077 |
Where | -0.075 |
On | -0.074 |
Token | . |
Token ID | 13 |
Feature activation | +5.61e+00 |
Pos logit contributions | |
anut | +0.155 |
i | +0.146 |
impressed | +0.145 |
bled | +0.143 |
years | +0.142 |
Neg logit contributions | |
There | -0.115 |
This | -0.114 |
Circ | -0.108 |
ride | -0.108 |
Where | -0.107 |
Token |
|
Token ID | 198 |
Feature activation | +4.77e+00 |
Pos logit contributions | |
hike | +0.114 |
Circ | +0.114 |
ride | +0.110 |
growth | +0.105 |
Across | +0.104 |
Neg logit contributions | |
impressed | -0.175 |
glad | -0.172 |
pleased | -0.166 |
thankful | -0.162 |
anut | -0.162 |
Token |
|
Token ID | 198 |
Feature activation | +4.84e+00 |
Pos logit contributions | |
Circ | +0.074 |
There | +0.072 |
Across | +0.072 |
ride | +0.071 |
hike | +0.068 |
Neg logit contributions | |
impressed | -0.174 |
glad | -0.169 |
anut | -0.167 |
pleased | -0.162 |
careful | -0.160 |
Token | Sarah |
Token ID | 29284 |
Feature activation | -5.67e+00 |
Pos logit contributions | |
There | +0.142 |
This | +0.138 |
Circ | +0.136 |
ride | +0.132 |
Across | +0.132 |
Neg logit contributions | |
impressed | -0.115 |
anut | -0.112 |
careful | -0.107 |
years | -0.107 |
bly | -0.106 |
Token | smiled |
Token ID | 13541 |
Feature activation | -1.43e+01 |
Pos logit contributions | |
anut | +0.222 |
impressed | +0.206 |
bly | +0.204 |
bled | +0.204 |
i | +0.204 |
Neg logit contributions | |
There | -0.242 |
This | -0.240 |
ride | -0.235 |
To | -0.230 |
Where | -0.226 |
Token | . |
Token ID | 13 |
Feature activation | +7.46e-01 |
Pos logit contributions | |
anut | +0.470 |
i | +0.433 |
irling | +0.426 |
years | +0.426 |
bly | +0.424 |
Neg logit contributions | |
There | -0.308 |
This | -0.304 |
to | -0.302 |
ride | -0.291 |
To | -0.281 |
Token | He |
Token ID | 679 |
Feature activation | -6.98e+00 |
Pos logit contributions | |
Circ | +0.016 |
hike | +0.016 |
growth | +0.016 |
ork | +0.016 |
shortcut | +0.015 |
Neg logit contributions | |
impressed | -0.022 |
glad | -0.022 |
pleased | -0.021 |
thankful | -0.021 |
grateful | -0.021 |
Token | felt |
Token ID | 2936 |
Feature activation | -7.64e+00 |
Pos logit contributions | |
anut | +0.188 |
i | +0.174 |
bly | +0.171 |
bled | +0.169 |
years | +0.167 |
Neg logit contributions | |
There | -0.186 |
This | -0.181 |
ride | -0.180 |
To | -0.177 |
Circ | -0.174 |
Token | happy |
Token ID | 3772 |
Feature activation | -1.48e+01 |
Pos logit contributions | |
anut | +0.163 |
i | +0.155 |
maid | +0.136 |
ushed | +0.133 |
ies | +0.133 |
Neg logit contributions | |
There | -0.260 |
This | -0.259 |
to | -0.252 |
To | -0.249 |
Where | -0.248 |
Token | knowing |
Token ID | 6970 |
Feature activation | -1.33e+01 |
Pos logit contributions | |
anut | +0.464 |
i | +0.448 |
impressed | +0.440 |
bled | +0.429 |
bly | +0.429 |
Neg logit contributions | |
There | -0.324 |
This | -0.317 |
Circ | -0.305 |
To | -0.299 |
Where | -0.298 |
Token | that |
Token ID | 326 |
Feature activation | +1.16e+00 |
Pos logit contributions | |
impressed | +0.388 |
anut | +0.383 |
i | +0.379 |
glad | +0.371 |
careful | +0.368 |
Neg logit contributions | |
There | -0.308 |
Circ | -0.303 |
This | -0.299 |
Across | -0.294 |
Where | -0.287 |
Token | his |
Token ID | 465 |
Feature activation | -6.19e+00 |
Pos logit contributions | |
There | +0.029 |
Circ | +0.028 |
Across | +0.028 |
growth | +0.028 |
This | +0.028 |
Neg logit contributions | |
impressed | -0.031 |
careful | -0.030 |
glad | -0.030 |
i | -0.029 |
pleased | -0.029 |
Token | mom |
Token ID | 1995 |
Feature activation | -5.92e+00 |
Pos logit contributions | |
anut | +0.159 |
i | +0.153 |
impressed | +0.150 |
bled | +0.144 |
bly | +0.143 |
Neg logit contributions | |
There | -0.170 |
This | -0.167 |
Circ | -0.163 |
Across | -0.159 |
Where | -0.159 |
Token | still |
Token ID | 991 |
Feature activation | -9.26e+00 |
Pos logit contributions | |
anut | +0.166 |
i | +0.159 |
impressed | +0.157 |
bled | +0.153 |
bly | +0.153 |
Neg logit contributions | |
There | -0.148 |
This | -0.145 |
Circ | -0.140 |
ride | -0.139 |
To | -0.138 |
Token | loved |
Token ID | 6151 |
Feature activation | -6.21e+00 |
Pos logit contributions | |
anut | +0.242 |
i | +0.228 |
impressed | +0.224 |
bled | +0.220 |
bly | +0.219 |
Neg logit contributions | |
There | -0.253 |
This | -0.249 |
ride | -0.240 |
Circ | -0.238 |
Where | -0.237 |
Token | . |
Token ID | 13 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
i | +0.131 |
impressed | +0.130 |
anut | +0.128 |
careful | +0.124 |
pleased | +0.123 |
Neg logit contributions | |
There | -0.095 |
Circ | -0.091 |
This | -0.089 |
Across | -0.089 |
Where | -0.087 |
Token | Everyone |
Token ID | 11075 |
Feature activation | +2.23e+00 |
Pos logit contributions | |
Circ | +0.021 |
ride | +0.020 |
ork | +0.020 |
shortcut | +0.020 |
hike | +0.020 |
Neg logit contributions | |
anut | -0.037 |
glad | -0.037 |
impressed | -0.036 |
i | -0.036 |
years | -0.035 |
Token | was |
Token ID | 373 |
Feature activation | -4.78e+00 |
Pos logit contributions | |
There | +0.062 |
This | +0.061 |
Circ | +0.059 |
To | +0.058 |
Where | +0.058 |
Neg logit contributions | |
anut | -0.056 |
i | -0.054 |
impressed | -0.053 |
bled | -0.051 |
bly | -0.051 |
Token | feeling |
Token ID | 4203 |
Feature activation | -4.79e+00 |
Pos logit contributions | |
anut | +0.099 |
i | +0.092 |
ushed | +0.090 |
mm | +0.085 |
bled | +0.084 |
Neg logit contributions | |
to | -0.160 |
ride | -0.154 |
university | -0.150 |
This | -0.150 |
There | -0.148 |
Token | so |
Token ID | 523 |
Feature activation | -1.14e+01 |
Pos logit contributions | |
anut | +0.110 |
i | +0.101 |
ushed | +0.091 |
bled | +0.091 |
irling | +0.087 |
Neg logit contributions | |
This | -0.156 |
There | -0.155 |
to | -0.154 |
ride | -0.148 |
Where | -0.148 |
Token | happy |
Token ID | 3772 |
Feature activation | -1.34e+01 |
Pos logit contributions | |
anut | +0.226 |
i | +0.211 |
bled | +0.188 |
years | +0.181 |
ishly | +0.177 |
Neg logit contributions | |
There | -0.401 |
This | -0.399 |
ride | -0.391 |
to | -0.389 |
university | -0.383 |
Token | . |
Token ID | 13 |
Feature activation | +2.99e+00 |
Pos logit contributions | |
anut | +0.390 |
i | +0.388 |
impressed | +0.381 |
bled | +0.363 |
bly | +0.362 |
Neg logit contributions | |
There | -0.313 |
This | -0.305 |
Circ | -0.304 |
Across | -0.297 |
Where | -0.293 |
Token |
|
Token ID | 198 |
Feature activation | +2.63e+00 |
Pos logit contributions | |
Circ | +0.050 |
ride | +0.050 |
hike | +0.047 |
ork | +0.046 |
Across | +0.046 |
Neg logit contributions | |
impressed | -0.100 |
anut | -0.100 |
glad | -0.098 |
i | -0.098 |
years | -0.096 |
Token |
|
Token ID | 198 |
Feature activation | +2.94e+00 |
Pos logit contributions | |
There | +0.037 |
This | +0.035 |
Circ | +0.034 |
ride | +0.034 |
Across | +0.033 |
Neg logit contributions | |
impressed | -0.101 |
anut | -0.100 |
glad | -0.097 |
irling | -0.096 |
i | -0.095 |
Token | Then |
Token ID | 6423 |
Feature activation | -5.01e+00 |
Pos logit contributions | |
There | +0.074 |
This | +0.072 |
Circ | +0.070 |
ride | +0.068 |
Across | +0.068 |
Neg logit contributions | |
anut | -0.082 |
impressed | -0.080 |
i | -0.078 |
bly | -0.077 |
irling | -0.076 |
Token | Daddy |
Token ID | 26926 |
Feature activation | -6.58e+00 |
Pos logit contributions | |
anut | +0.160 |
impressed | +0.150 |
i | +0.149 |
bled | +0.146 |
years | +0.146 |
Neg logit contributions | |
There | -0.112 |
This | -0.109 |
Where | -0.104 |
To | -0.103 |
ride | -0.103 |
Token | o |
Token ID | 78 |
Feature activation | -7.03e+00 |
Pos logit contributions | |
bly | +0.205 |
bled | +0.204 |
anut | +0.204 |
impressed | +0.201 |
irling | +0.196 |
Neg logit contributions | |
This | -0.187 |
There | -0.184 |
Circ | -0.180 |
ride | -0.179 |
to | -0.179 |
Token | and |
Token ID | 290 |
Feature activation | -8.71e+00 |
Pos logit contributions | |
anut | +0.188 |
bly | +0.181 |
bled | +0.176 |
i | +0.174 |
years | +0.173 |
Neg logit contributions | |
There | -0.181 |
This | -0.180 |
ride | -0.176 |
Circ | -0.174 |
To | -0.174 |
Token | Coco |
Token ID | 48222 |
Feature activation | -8.93e+00 |
Pos logit contributions | |
i | +0.237 |
anut | +0.229 |
gigg | +0.216 |
impressed | +0.213 |
grateful | +0.212 |
Neg logit contributions | |
There | -0.222 |
Across | -0.214 |
ork | -0.212 |
Circ | -0.212 |
This | -0.206 |
Token | were |
Token ID | 547 |
Feature activation | -4.47e+00 |
Pos logit contributions | |
anut | +0.232 |
i | +0.220 |
impressed | +0.217 |
bly | +0.215 |
bled | +0.214 |
Neg logit contributions | |
There | -0.240 |
This | -0.235 |
ride | -0.232 |
Circ | -0.231 |
To | -0.226 |
Token | very |
Token ID | 845 |
Feature activation | -1.30e+01 |
Pos logit contributions | |
ushed | +0.109 |
ising | +0.108 |
years | +0.107 |
anut | +0.106 |
ering | +0.105 |
Neg logit contributions | |
to | -0.131 |
This | -0.126 |
There | -0.120 |
across | -0.119 |
ride | -0.119 |
Token | happy |
Token ID | 3772 |
Feature activation | -1.34e+01 |
Pos logit contributions | |
ishly | +0.354 |
itive | +0.352 |
iously | +0.344 |
i | +0.343 |
eking | +0.334 |
Neg logit contributions | |
to | -0.360 |
ride | -0.331 |
visit | -0.323 |
This | -0.316 |
| -0.304 |
Token | . |
Token ID | 13 |
Feature activation | +3.36e+00 |
Pos logit contributions | |
anut | +0.447 |
i | +0.446 |
impressed | +0.441 |
careful | +0.424 |
glad | +0.422 |
Neg logit contributions | |
There | -0.254 |
This | -0.247 |
Circ | -0.244 |
Across | -0.241 |
Where | -0.232 |
Token | They |
Token ID | 1119 |
Feature activation | -3.64e+00 |
Pos logit contributions | |
Circ | +0.071 |
ride | +0.070 |
university | +0.068 |
Across | +0.068 |
There | +0.067 |
Neg logit contributions | |
anut | -0.100 |
i | -0.096 |
impressed | -0.095 |
glad | -0.094 |
grateful | -0.092 |
Token | played |
Token ID | 2826 |
Feature activation | +2.56e+00 |
Pos logit contributions | |
bly | +0.099 |
anut | +0.098 |
irling | +0.096 |
bled | +0.095 |
ising | +0.094 |
Neg logit contributions | |
ride | -0.093 |
There | -0.089 |
visit | -0.087 |
Under | -0.086 |
On | -0.086 |
Token | together |
Token ID | 1978 |
Feature activation | -3.32e+00 |
Pos logit contributions | |
There | +0.074 |
This | +0.073 |
Circ | +0.071 |
To | +0.071 |
ride | +0.070 |
Neg logit contributions | |
anut | -0.062 |
i | -0.060 |
impressed | -0.057 |
years | -0.056 |
bly | -0.055 |
Token | in |
Token ID | 287 |
Feature activation | +6.64e+00 |
Pos logit contributions | |
anut | +0.085 |
i | +0.082 |
impressed | +0.081 |
bled | +0.077 |
careful | +0.077 |
Neg logit contributions | |
There | -0.092 |
This | -0.089 |
Circ | -0.087 |
Where | -0.087 |
Across | -0.086 |
Token | tank |
Token ID | 6873 |
Feature activation | +2.85e+00 |
Pos logit contributions | |
i | +0.154 |
anut | +0.153 |
impressed | +0.149 |
careful | +0.146 |
maid | +0.145 |
Neg logit contributions | |
There | -0.110 |
Circ | -0.103 |
This | -0.102 |
Across | -0.102 |
On | -0.100 |
Token | , |
Token ID | 11 |
Feature activation | -1.11e+01 |
Pos logit contributions | |
Circ | +0.067 |
There | +0.066 |
This | +0.063 |
Where | +0.063 |
Across | +0.063 |
Neg logit contributions | |
anut | -0.083 |
i | -0.083 |
impressed | -0.080 |
careful | -0.077 |
bly | -0.077 |
Token | Fin |
Token ID | 4463 |
Feature activation | -8.00e+00 |
Pos logit contributions | |
anut | +0.300 |
i | +0.290 |
impressed | +0.287 |
bly | +0.274 |
bled | +0.274 |
Neg logit contributions | |
There | -0.285 |
This | -0.280 |
Circ | -0.276 |
Across | -0.268 |
Where | -0.268 |
Token | was |
Token ID | 373 |
Feature activation | -4.89e+00 |
Pos logit contributions | |
anut | +0.232 |
i | +0.223 |
impressed | +0.222 |
bly | +0.219 |
bled | +0.217 |
Neg logit contributions | |
There | -0.190 |
This | -0.188 |
ride | -0.179 |
Circ | -0.179 |
To | -0.177 |
Token | very |
Token ID | 845 |
Feature activation | -1.42e+01 |
Pos logit contributions | |
anut | +0.101 |
i | +0.097 |
bled | +0.089 |
ushed | +0.089 |
ising | +0.088 |
Neg logit contributions | |
There | -0.158 |
This | -0.156 |
ride | -0.152 |
To | -0.151 |
to | -0.150 |
Token | happy |
Token ID | 3772 |
Feature activation | -1.34e+01 |
Pos logit contributions | |
i | +0.294 |
anut | +0.275 |
bled | +0.268 |
maid | +0.262 |
itive | +0.259 |
Neg logit contributions | |
to | -0.463 |
This | -0.449 |
ride | -0.447 |
There | -0.443 |
Where | -0.421 |
Token | . |
Token ID | 13 |
Feature activation | +2.91e+00 |
Pos logit contributions | |
i | +0.388 |
impressed | +0.378 |
anut | +0.375 |
careful | +0.361 |
glad | +0.358 |
Neg logit contributions | |
There | -0.317 |
Circ | -0.314 |
Across | -0.307 |
This | -0.302 |
On | -0.295 |
Token | Lucy |
Token ID | 22162 |
Feature activation | -8.29e+00 |
Pos logit contributions | |
Circ | +0.055 |
ork | +0.054 |
university | +0.052 |
ride | +0.052 |
Across | +0.050 |
Neg logit contributions | |
anut | -0.093 |
glad | -0.091 |
i | -0.091 |
impressed | -0.091 |
grateful | -0.087 |
Token | felt |
Token ID | 2936 |
Feature activation | -5.76e+00 |
Pos logit contributions | |
bly | +0.249 |
anut | +0.249 |
bled | +0.243 |
years | +0.233 |
irling | +0.229 |
Neg logit contributions | |
ride | -0.190 |
There | -0.185 |
to | -0.183 |
This | -0.182 |
visit | -0.179 |
Token | like |
Token ID | 588 |
Feature activation | -5.17e+00 |
Pos logit contributions | |
anut | +0.119 |
i | +0.117 |
ushed | +0.105 |
maid | +0.104 |
ies | +0.104 |
Neg logit contributions | |
This | -0.203 |
There | -0.196 |
to | -0.196 |
Where | -0.191 |
To | -0.191 |
Token | she |
Token ID | 673 |
Feature activation | -2.99e+00 |
Pos logit contributions | |
anut | +0.162 |
i | +0.157 |
impressed | +0.154 |
bled | +0.150 |
bly | +0.150 |
Neg logit contributions | |
There | -0.112 |
This | -0.110 |
Circ | -0.106 |
To | -0.104 |
Where | -0.104 |
Token | noises |
Token ID | 26782 |
Feature activation | +3.85e+00 |
Pos logit contributions | |
i | +0.101 |
gigg | +0.094 |
impressed | +0.093 |
anut | +0.092 |
careful | +0.092 |
Neg logit contributions | |
Circ | -0.096 |
There | -0.095 |
This | -0.095 |
To | -0.093 |
On | -0.092 |
Token | . |
Token ID | 13 |
Feature activation | +5.53e+00 |
Pos logit contributions | |
Circ | +0.099 |
This | +0.097 |
There | +0.094 |
To | +0.093 |
Across | +0.093 |
Neg logit contributions | |
i | -0.103 |
impressed | -0.099 |
gigg | -0.098 |
bly | -0.097 |
careful | -0.096 |
Token | One |
Token ID | 1881 |
Feature activation | +7.19e-01 |
Pos logit contributions | |
Circ | +0.107 |
growth | +0.104 |
university | +0.103 |
ork | +0.101 |
aid | +0.096 |
Neg logit contributions | |
i | -0.174 |
anut | -0.169 |
impressed | -0.166 |
glad | -0.165 |
careful | -0.163 |
Token | day |
Token ID | 1110 |
Feature activation | +4.16e+00 |
Pos logit contributions | |
There | +0.014 |
This | +0.014 |
Circ | +0.013 |
To | +0.013 |
Where | +0.013 |
Neg logit contributions | |
anut | -0.024 |
i | -0.024 |
impressed | -0.023 |
bly | -0.023 |
bled | -0.023 |
Token | , |
Token ID | 11 |
Feature activation | -7.55e+00 |
Pos logit contributions | |
There | +0.114 |
This | +0.112 |
Where | +0.106 |
To | +0.106 |
ride | +0.106 |
Neg logit contributions | |
anut | -0.114 |
impressed | -0.105 |
i | -0.105 |
years | -0.101 |
bly | -0.099 |
Token | B |
Token ID | 347 |
Feature activation | -1.36e+01 |
Pos logit contributions | |
anut | +0.225 |
impressed | +0.204 |
i | +0.201 |
irling | +0.197 |
ushed | +0.196 |
Neg logit contributions | |
There | -0.200 |
This | -0.195 |
ride | -0.192 |
On | -0.188 |
to | -0.186 |
Token | aa |
Token ID | 7252 |
Feature activation | -4.88e+00 |
Pos logit contributions | |
anut | +0.311 |
i | +0.296 |
impressed | +0.291 |
bled | +0.279 |
bly | +0.279 |
Neg logit contributions | |
There | -0.411 |
This | -0.405 |
Circ | -0.394 |
ride | -0.387 |
To | -0.387 |
Token | wandered |
Token ID | 36036 |
Feature activation | +2.95e+00 |
Pos logit contributions | |
anut | +0.147 |
impressed | +0.142 |
bly | +0.137 |
irling | +0.137 |
years | +0.137 |
Neg logit contributions | |
There | -0.112 |
This | -0.110 |
ride | -0.107 |
Circ | -0.103 |
Where | -0.103 |
Token | into |
Token ID | 656 |
Feature activation | +6.85e+00 |
Pos logit contributions | |
There | +0.051 |
This | +0.050 |
to | +0.049 |
To | +0.048 |
On | +0.048 |
Neg logit contributions | |
anut | -0.108 |
i | -0.104 |
impressed | -0.102 |
ushed | -0.100 |
irling | -0.100 |
Token | a |
Token ID | 257 |
Feature activation | +1.83e+00 |
Pos logit contributions | |
There | +0.161 |
This | +0.157 |
Across | +0.152 |
Circ | +0.149 |
Where | +0.147 |
Neg logit contributions | |
careful | -0.189 |
i | -0.189 |
impressed | -0.187 |
anut | -0.185 |
gigg | -0.183 |
Token | new |
Token ID | 649 |
Feature activation | -2.22e+00 |
Pos logit contributions | |
There | +0.046 |
This | +0.046 |
Circ | +0.043 |
growth | +0.042 |
Across | +0.042 |
Neg logit contributions | |
impressed | -0.050 |
anut | -0.050 |
careful | -0.050 |
i | -0.048 |
irling | -0.047 |