Token |
|
Token ID | 198 |
Feature activation | +1.28e-02 |
Pos logit contributions | |
keeping | +0.056 |
outage | +0.055 |
leaf | +0.053 |
rated | +0.053 |
harm | +0.053 |
Neg logit contributions | |
nicely | -0.020 |
aws | -0.016 |
fair | -0.015 |
nice | -0.014 |
hockey | -0.014 |
Token | Ben |
Token ID | 11696 |
Feature activation | +3.97e-02 |
Pos logit contributions | |
keeping | +0.031 |
outage | +0.030 |
harm | +0.029 |
leaf | +0.029 |
fly | +0.029 |
Neg logit contributions | |
nicely | -0.012 |
fair | -0.011 |
aws | -0.010 |
uddle | -0.010 |
hockey | -0.009 |
Token | says |
Token ID | 1139 |
Feature activation | +4.44e-02 |
Pos logit contributions | |
keeping | +0.072 |
outage | +0.066 |
leaf | +0.063 |
harm | +0.063 |
fright | +0.061 |
Neg logit contributions | |
nicely | -0.061 |
fair | -0.061 |
uddle | -0.059 |
aws | -0.058 |
favorite | -0.057 |
Token | , |
Token ID | 11 |
Feature activation | -1.40e-01 |
Pos logit contributions | |
keeping | +0.103 |
fright | +0.102 |
fly | +0.100 |
leaf | +0.099 |
outage | +0.099 |
Neg logit contributions | |
uddle | -0.038 |
aws | -0.037 |
fair | -0.033 |
sandbox | -0.032 |
nicely | -0.032 |
Token | " |
Token ID | 366 |
Feature activation | -5.74e-02 |
Pos logit contributions | |
nicely | +0.084 |
fair | +0.078 |
uddle | +0.073 |
aws | +0.069 |
favorite | +0.066 |
Neg logit contributions | |
keeping | -0.377 |
outage | -0.360 |
leaf | -0.357 |
harm | -0.352 |
fright | -0.349 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.03e-01 |
Pos logit contributions | |
nicely | +0.085 |
fair | +0.082 |
uddle | +0.078 |
aws | +0.077 |
favorite | +0.076 |
Neg logit contributions | |
keeping | -0.107 |
outage | -0.099 |
harm | -0.096 |
Judy | -0.096 |
leaf | -0.095 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
keeping | +0.498 |
fright | +0.479 |
outage | +0.478 |
Judy | +0.475 |
harm | +0.470 |
Neg logit contributions | |
nicely | -0.147 |
uddle | -0.142 |
aws | -0.139 |
fair | -0.134 |
favorite | -0.128 |
Token | dad |
Token ID | 9955 |
Feature activation | +6.46e-02 |
Pos logit contributions | |
fair | +0.181 |
nicely | +0.179 |
nice | +0.158 |
favorite | +0.158 |
hockey | +0.152 |
Neg logit contributions | |
keeping | -0.323 |
outage | -0.291 |
leaf | -0.290 |
harm | -0.289 |
fly | -0.283 |
Token | . |
Token ID | 13 |
Feature activation | +9.12e-03 |
Pos logit contributions | |
keeping | +0.164 |
outage | +0.157 |
leaf | +0.156 |
fright | +0.156 |
eway | +0.153 |
Neg logit contributions | |
fair | -0.055 |
nicely | -0.054 |
nice | -0.045 |
aws | -0.042 |
favorite | -0.039 |
Token | But |
Token ID | 887 |
Feature activation | +7.72e-02 |
Pos logit contributions | |
keeping | +0.015 |
harm | +0.014 |
outage | +0.014 |
fright | +0.013 |
leaf | +0.013 |
Neg logit contributions | |
nicely | -0.015 |
fair | -0.015 |
favorite | -0.014 |
uddle | -0.014 |
aws | -0.014 |
Token | can |
Token ID | 460 |
Feature activation | +6.42e-02 |
Pos logit contributions | |
keeping | +0.135 |
leaf | +0.124 |
outage | +0.124 |
fright | +0.119 |
harm | +0.119 |
Neg logit contributions | |
nicely | -0.121 |
fair | -0.119 |
uddle | -0.114 |
aws | -0.112 |
favorite | -0.111 |
Token | . |
Token ID | 13 |
Feature activation | +2.01e-02 |
Pos logit contributions | |
keeping | +0.035 |
fright | +0.033 |
fly | +0.033 |
leaf | +0.033 |
Judy | +0.032 |
Neg logit contributions | |
uddle | -0.026 |
nicely | -0.024 |
aws | -0.024 |
gym | -0.023 |
fair | -0.023 |
Token | She |
Token ID | 1375 |
Feature activation | -2.47e-02 |
Pos logit contributions | |
keeping | +0.045 |
outage | +0.043 |
Judy | +0.043 |
leaf | +0.042 |
fly | +0.042 |
Neg logit contributions | |
uddle | -0.020 |
nicely | -0.019 |
fair | -0.019 |
aws | -0.018 |
favorite | -0.018 |
Token | says |
Token ID | 1139 |
Feature activation | +4.19e-02 |
Pos logit contributions | |
fair | +0.044 |
nicely | +0.044 |
uddle | +0.043 |
aws | +0.042 |
favorite | +0.041 |
Neg logit contributions | |
keeping | -0.041 |
outage | -0.036 |
fright | -0.034 |
harm | -0.034 |
eway | -0.034 |
Token | , |
Token ID | 11 |
Feature activation | -1.40e-01 |
Pos logit contributions | |
keeping | +0.091 |
Judy | +0.090 |
leaf | +0.089 |
Maria | +0.089 |
outage | +0.088 |
Neg logit contributions | |
uddle | -0.041 |
aws | -0.038 |
nicely | -0.036 |
favorite | -0.035 |
fair | -0.035 |
Token | " |
Token ID | 366 |
Feature activation | -5.92e-02 |
Pos logit contributions | |
uddle | +0.120 |
aws | +0.116 |
fair | +0.112 |
nicely | +0.110 |
favorite | +0.103 |
Neg logit contributions | |
keeping | -0.329 |
fly | -0.316 |
outage | -0.314 |
leaf | -0.314 |
Judy | -0.314 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.03e-01 |
Pos logit contributions | |
nicely | +0.081 |
fair | +0.080 |
aws | +0.075 |
uddle | +0.071 |
nice | +0.071 |
Neg logit contributions | |
keeping | -0.116 |
outage | -0.109 |
harm | -0.108 |
fright | -0.105 |
fly | -0.105 |
Token | , |
Token ID | 11 |
Feature activation | -1.44e-01 |
Pos logit contributions | |
keeping | +0.504 |
outage | +0.483 |
fright | +0.483 |
Judy | +0.480 |
leaf | +0.479 |
Neg logit contributions | |
nicely | -0.141 |
uddle | -0.138 |
aws | -0.133 |
fair | -0.130 |
favorite | -0.124 |
Token | you |
Token ID | 345 |
Feature activation | +6.96e-02 |
Pos logit contributions | |
fair | +0.205 |
nicely | +0.200 |
nice | +0.182 |
aws | +0.182 |
favorite | +0.182 |
Neg logit contributions | |
keeping | -0.291 |
outage | -0.265 |
fly | -0.260 |
harm | -0.259 |
leaf | -0.259 |
Token | can |
Token ID | 460 |
Feature activation | +6.65e-02 |
Pos logit contributions | |
keeping | +0.141 |
outage | +0.125 |
fly | +0.125 |
leaf | +0.122 |
harm | +0.121 |
Neg logit contributions | |
fair | -0.106 |
nicely | -0.105 |
nice | -0.097 |
uddle | -0.094 |
favorite | -0.093 |
Token | play |
Token ID | 711 |
Feature activation | -6.23e-02 |
Pos logit contributions | |
keeping | +0.116 |
outage | +0.109 |
leaf | +0.108 |
fright | +0.107 |
harm | +0.107 |
Neg logit contributions | |
nicely | -0.098 |
uddle | -0.096 |
fair | -0.096 |
aws | -0.095 |
favorite | -0.091 |
Token | with |
Token ID | 351 |
Feature activation | +1.57e-02 |
Pos logit contributions | |
uddle | +0.072 |
aws | +0.071 |
cart | +0.068 |
udd | +0.067 |
Days | +0.066 |
Neg logit contributions | |
keeping | -0.120 |
e | -0.116 |
fright | -0.116 |
leaf | -0.112 |
harm | -0.111 |
Token | . |
Token ID | 13 |
Feature activation | +2.04e-02 |
Pos logit contributions | |
nicely | +0.021 |
fair | +0.021 |
uddle | +0.021 |
aws | +0.020 |
favorite | +0.019 |
Neg logit contributions | |
keeping | -0.039 |
outage | -0.036 |
leaf | -0.036 |
fright | -0.035 |
harm | -0.035 |
Token | He |
Token ID | 679 |
Feature activation | -3.65e-02 |
Pos logit contributions | |
keeping | +0.044 |
outage | +0.042 |
Judy | +0.041 |
fly | +0.041 |
leaf | +0.041 |
Neg logit contributions | |
uddle | -0.022 |
nicely | -0.022 |
fair | -0.021 |
aws | -0.021 |
favorite | -0.020 |
Token | says |
Token ID | 1139 |
Feature activation | +4.24e-02 |
Pos logit contributions | |
nicely | +0.060 |
fair | +0.059 |
uddle | +0.059 |
aws | +0.058 |
favorite | +0.057 |
Neg logit contributions | |
keeping | -0.063 |
outage | -0.057 |
leaf | -0.054 |
harm | -0.054 |
fly | -0.053 |
Token | , |
Token ID | 11 |
Feature activation | -1.41e-01 |
Pos logit contributions | |
ull | +0.085 |
Judy | +0.083 |
fright | +0.082 |
keeping | +0.081 |
fly | +0.081 |
Neg logit contributions | |
uddle | -0.051 |
aws | -0.048 |
udd | -0.045 |
Days | -0.044 |
nicely | -0.042 |
Token | " |
Token ID | 366 |
Feature activation | -6.10e-02 |
Pos logit contributions | |
uddle | +0.141 |
aws | +0.137 |
fair | +0.127 |
nicely | +0.124 |
udd | +0.121 |
Neg logit contributions | |
keeping | -0.303 |
fly | -0.302 |
Judy | -0.296 |
outage | -0.295 |
fright | -0.294 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
nicely | +0.080 |
fair | +0.079 |
aws | +0.074 |
uddle | +0.072 |
favorite | +0.070 |
Neg logit contributions | |
keeping | -0.122 |
outage | -0.118 |
leaf | -0.112 |
harm | -0.111 |
fright | -0.111 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.485 |
fright | +0.473 |
outage | +0.470 |
Judy | +0.468 |
Maria | +0.463 |
Neg logit contributions | |
nicely | -0.149 |
uddle | -0.146 |
aws | -0.143 |
fair | -0.136 |
favorite | -0.130 |
Token | mom |
Token ID | 1995 |
Feature activation | +9.13e-02 |
Pos logit contributions | |
fair | +0.194 |
nicely | +0.193 |
favorite | +0.174 |
cart | +0.173 |
nice | +0.172 |
Neg logit contributions | |
keeping | -0.318 |
leaf | -0.283 |
outage | -0.282 |
harm | -0.277 |
fly | -0.273 |
Token | . |
Token ID | 13 |
Feature activation | +6.98e-03 |
Pos logit contributions | |
keeping | +0.220 |
leaf | +0.215 |
outage | +0.209 |
fright | +0.209 |
eway | +0.204 |
Neg logit contributions | |
fair | -0.085 |
nicely | -0.083 |
aws | -0.074 |
nice | -0.071 |
uddle | -0.070 |
Token | I |
Token ID | 314 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
keeping | +0.013 |
harm | +0.012 |
leaf | +0.012 |
outage | +0.012 |
fright | +0.012 |
Neg logit contributions | |
nicely | -0.011 |
fair | -0.010 |
favorite | -0.010 |
uddle | -0.010 |
cart | -0.009 |
Token | deserve |
Token ID | 10925 |
Feature activation | +6.30e-03 |
Pos logit contributions | |
keeping | +0.078 |
Maria | +0.067 |
Judy | +0.067 |
outage | +0.067 |
eway | +0.066 |
Neg logit contributions | |
nicely | -0.067 |
fair | -0.066 |
favorite | -0.063 |
nice | -0.062 |
uddle | -0.062 |
Token | . |
Token ID | 13 |
Feature activation | +1.58e-02 |
Pos logit contributions | |
nicely | +0.029 |
fair | +0.028 |
favorite | +0.025 |
aws | +0.025 |
uddle | +0.025 |
Neg logit contributions | |
keeping | -0.061 |
outage | -0.058 |
leaf | -0.057 |
harm | -0.055 |
fright | -0.055 |
Token | She |
Token ID | 1375 |
Feature activation | -2.44e-02 |
Pos logit contributions | |
keeping | +0.036 |
outage | +0.034 |
leaf | +0.033 |
Judy | +0.033 |
fly | +0.033 |
Neg logit contributions | |
uddle | -0.015 |
nicely | -0.015 |
fair | -0.015 |
aws | -0.015 |
favorite | -0.014 |
Token | says |
Token ID | 1139 |
Feature activation | +4.26e-02 |
Pos logit contributions | |
fair | +0.043 |
nicely | +0.043 |
uddle | +0.042 |
aws | +0.042 |
favorite | +0.041 |
Neg logit contributions | |
keeping | -0.040 |
outage | -0.036 |
harm | -0.033 |
leaf | -0.033 |
eway | -0.033 |
Token | , |
Token ID | 11 |
Feature activation | -1.40e-01 |
Pos logit contributions | |
keeping | +0.090 |
Judy | +0.090 |
fright | +0.088 |
Maria | +0.087 |
outage | +0.087 |
Neg logit contributions | |
uddle | -0.045 |
aws | -0.041 |
nicely | -0.039 |
Days | -0.038 |
udd | -0.038 |
Token | " |
Token ID | 366 |
Feature activation | -6.04e-02 |
Pos logit contributions | |
uddle | +0.111 |
aws | +0.106 |
nicely | +0.104 |
fair | +0.104 |
favorite | +0.094 |
Neg logit contributions | |
keeping | -0.335 |
outage | -0.322 |
fly | -0.319 |
leaf | -0.319 |
Judy | -0.318 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
nicely | +0.077 |
fair | +0.075 |
aws | +0.070 |
uddle | +0.068 |
favorite | +0.065 |
Neg logit contributions | |
keeping | -0.123 |
outage | -0.119 |
harm | -0.114 |
fly | -0.113 |
leaf | -0.112 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.486 |
fright | +0.476 |
outage | +0.472 |
Judy | +0.471 |
Maria | +0.466 |
Neg logit contributions | |
nicely | -0.147 |
uddle | -0.141 |
aws | -0.139 |
fair | -0.134 |
favorite | -0.128 |
Token | Ben |
Token ID | 3932 |
Feature activation | +1.89e-02 |
Pos logit contributions | |
nicely | +0.173 |
fair | +0.171 |
favorite | +0.153 |
cart | +0.149 |
nice | +0.148 |
Neg logit contributions | |
keeping | -0.337 |
outage | -0.302 |
leaf | -0.301 |
harm | -0.297 |
eway | -0.294 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.051 |
leaf | +0.051 |
outage | +0.050 |
eway | +0.048 |
fright | +0.047 |
Neg logit contributions | |
fair | -0.013 |
nicely | -0.012 |
nice | -0.011 |
aws | -0.010 |
uddle | -0.010 |
Token | I |
Token ID | 314 |
Feature activation | +4.08e-02 |
Pos logit contributions | |
fair | +0.229 |
nicely | +0.228 |
favorite | +0.207 |
nice | +0.205 |
cart | +0.202 |
Neg logit contributions | |
keeping | -0.279 |
outage | -0.249 |
leaf | -0.244 |
jo | -0.241 |
harm | -0.239 |
Token | want |
Token ID | 765 |
Feature activation | +4.64e-02 |
Pos logit contributions | |
keeping | +0.073 |
leaf | +0.063 |
outage | +0.063 |
Maria | +0.062 |
Judy | +0.062 |
Neg logit contributions | |
nicely | -0.069 |
fair | -0.068 |
uddle | -0.065 |
favorite | -0.064 |
nice | -0.063 |
Token | says |
Token ID | 1139 |
Feature activation | +5.47e-02 |
Pos logit contributions | |
keeping | +0.020 |
Judy | +0.019 |
leaf | +0.019 |
fright | +0.018 |
outage | +0.018 |
Neg logit contributions | |
uddle | -0.017 |
fair | -0.017 |
nicely | -0.017 |
favorite | -0.016 |
aws | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +2.18e-02 |
Pos logit contributions | |
keeping | +0.139 |
outage | +0.131 |
leaf | +0.130 |
harm | +0.127 |
icated | +0.125 |
Neg logit contributions | |
nicely | -0.050 |
fair | -0.047 |
aws | -0.042 |
favorite | -0.041 |
nice | -0.040 |
Token |
|
Token ID | 198 |
Feature activation | +1.26e-02 |
Pos logit contributions | |
keeping | +0.052 |
harm | +0.049 |
leaf | +0.049 |
outage | +0.049 |
fright | +0.047 |
Neg logit contributions | |
nicely | -0.022 |
fair | -0.019 |
aws | -0.019 |
uddle | -0.018 |
cart | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | +8.66e-03 |
Pos logit contributions | |
keeping | +0.033 |
outage | +0.031 |
leaf | +0.031 |
rated | +0.031 |
fly | +0.031 |
Neg logit contributions | |
nicely | -0.011 |
aws | -0.010 |
fair | -0.009 |
nice | -0.008 |
uddle | -0.008 |
Token | " |
Token ID | 1 |
Feature activation | -6.76e-02 |
Pos logit contributions | |
keeping | +0.021 |
outage | +0.020 |
leaf | +0.020 |
harm | +0.019 |
fly | +0.019 |
Neg logit contributions | |
nicely | -0.008 |
fair | -0.007 |
aws | -0.007 |
uddle | -0.007 |
nice | -0.006 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
nicely | +0.097 |
uddle | +0.096 |
fair | +0.095 |
aws | +0.092 |
favorite | +0.090 |
Neg logit contributions | |
keeping | -0.123 |
leaf | -0.116 |
outage | -0.115 |
fright | -0.112 |
fly | -0.112 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.567 |
leaf | +0.538 |
outage | +0.536 |
fly | +0.524 |
harm | +0.523 |
Neg logit contributions | |
nicely | -0.111 |
fair | -0.107 |
uddle | -0.098 |
aws | -0.091 |
favorite | -0.084 |
Token | they |
Token ID | 484 |
Feature activation | -6.60e-02 |
Pos logit contributions | |
fair | +0.186 |
nicely | +0.178 |
nice | +0.163 |
favorite | +0.158 |
cart | +0.156 |
Neg logit contributions | |
keeping | -0.338 |
outage | -0.303 |
rated | -0.301 |
harm | -0.300 |
leaf | -0.297 |
Token | are |
Token ID | 389 |
Feature activation | -3.24e-02 |
Pos logit contributions | |
fair | +0.108 |
nicely | +0.105 |
uddle | +0.102 |
aws | +0.101 |
favorite | +0.099 |
Neg logit contributions | |
keeping | -0.119 |
outage | -0.108 |
fly | -0.105 |
leaf | -0.104 |
eway | -0.102 |
Token | !" |
Token ID | 2474 |
Feature activation | +6.72e-02 |
Pos logit contributions | |
nicely | +0.044 |
fair | +0.043 |
uddle | +0.041 |
aws | +0.041 |
favorite | +0.040 |
Neg logit contributions | |
keeping | -0.063 |
outage | -0.059 |
leaf | -0.059 |
fright | -0.057 |
harm | -0.057 |
Token | Ben |
Token ID | 3932 |
Feature activation | +2.18e-02 |
Pos logit contributions | |
keeping | +0.166 |
outage | +0.160 |
rated | +0.157 |
harm | +0.153 |
leaf | +0.153 |
Neg logit contributions | |
nicely | -0.067 |
fair | -0.062 |
aws | -0.057 |
cart | -0.055 |
favorite | -0.053 |
Token | says |
Token ID | 1139 |
Feature activation | +4.70e-02 |
Pos logit contributions | |
keeping | +0.184 |
outage | +0.161 |
eway | +0.160 |
leaf | +0.157 |
tal | +0.154 |
Neg logit contributions | |
nicely | -0.126 |
fair | -0.122 |
p | -0.113 |
nice | -0.111 |
at | -0.110 |
Token | . |
Token ID | 13 |
Feature activation | +2.25e-02 |
Pos logit contributions | |
keeping | +0.104 |
outage | +0.098 |
leaf | +0.097 |
harm | +0.094 |
fright | +0.094 |
Neg logit contributions | |
nicely | -0.054 |
fair | -0.050 |
uddle | -0.047 |
aws | -0.046 |
favorite | -0.046 |
Token |
|
Token ID | 198 |
Feature activation | +1.07e-02 |
Pos logit contributions | |
keeping | +0.053 |
outage | +0.050 |
leaf | +0.050 |
harm | +0.050 |
fright | +0.049 |
Neg logit contributions | |
nicely | -0.021 |
fair | -0.020 |
aws | -0.019 |
uddle | -0.019 |
cart | -0.018 |
Token |
|
Token ID | 198 |
Feature activation | +7.88e-03 |
Pos logit contributions | |
keeping | +0.026 |
outage | +0.025 |
leaf | +0.025 |
fly | +0.025 |
rated | +0.025 |
Neg logit contributions | |
nicely | -0.010 |
aws | -0.009 |
fair | -0.008 |
nice | -0.008 |
uddle | -0.007 |
Token | " |
Token ID | 1 |
Feature activation | -6.93e-02 |
Pos logit contributions | |
keeping | +0.019 |
outage | +0.018 |
leaf | +0.018 |
harm | +0.017 |
fly | +0.017 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.007 |
aws | -0.007 |
uddle | -0.007 |
cart | -0.006 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
uddle | +0.115 |
nicely | +0.109 |
aws | +0.108 |
favorite | +0.105 |
fair | +0.105 |
Neg logit contributions | |
leaf | -0.112 |
keeping | -0.107 |
outage | -0.103 |
fright | -0.101 |
fly | -0.100 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
keeping | +0.537 |
outage | +0.511 |
leaf | +0.509 |
fright | +0.501 |
harm | +0.500 |
Neg logit contributions | |
nicely | -0.126 |
fair | -0.120 |
uddle | -0.116 |
aws | -0.111 |
favorite | -0.103 |
Token | Mom |
Token ID | 11254 |
Feature activation | +8.30e-02 |
Pos logit contributions | |
fair | +0.186 |
nicely | +0.182 |
nice | +0.165 |
favorite | +0.164 |
cart | +0.161 |
Neg logit contributions | |
keeping | -0.321 |
outage | -0.289 |
harm | -0.287 |
leaf | -0.286 |
fly | -0.285 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
keeping | +0.203 |
leaf | +0.198 |
fright | +0.194 |
eway | +0.193 |
growth | +0.192 |
Neg logit contributions | |
fair | -0.076 |
nicely | -0.070 |
nice | -0.063 |
uddle | -0.061 |
aws | -0.060 |
Token | we |
Token ID | 356 |
Feature activation | +3.48e-02 |
Pos logit contributions | |
fair | +0.227 |
nicely | +0.225 |
favorite | +0.210 |
uddle | +0.208 |
aws | +0.207 |
Neg logit contributions | |
keeping | -0.266 |
outage | -0.242 |
leaf | -0.239 |
fly | -0.236 |
harm | -0.236 |
Token | understand |
Token ID | 1833 |
Feature activation | +7.14e-02 |
Pos logit contributions | |
keeping | +0.068 |
outage | +0.061 |
fly | +0.060 |
eway | +0.059 |
jo | +0.058 |
Neg logit contributions | |
fair | -0.055 |
nicely | -0.054 |
nice | -0.050 |
aws | -0.049 |
favorite | -0.048 |
Token | door |
Token ID | 3420 |
Feature activation | +3.55e-02 |
Pos logit contributions | |
keeping | +0.122 |
outage | +0.113 |
harm | +0.112 |
fright | +0.112 |
leaf | +0.109 |
Neg logit contributions | |
uddle | -0.088 |
nicely | -0.086 |
fair | -0.086 |
aws | -0.085 |
favorite | -0.082 |
Token | and |
Token ID | 290 |
Feature activation | +5.07e-02 |
Pos logit contributions | |
leaf | +0.080 |
keeping | +0.080 |
icated | +0.080 |
fly | +0.078 |
fright | +0.076 |
Neg logit contributions | |
nicely | -0.035 |
fair | -0.033 |
nice | -0.029 |
cart | -0.028 |
favorite | -0.028 |
Token | smiles |
Token ID | 21845 |
Feature activation | +1.77e-02 |
Pos logit contributions | |
keeping | +0.101 |
fly | +0.097 |
ull | +0.097 |
outage | +0.096 |
leaf | +0.096 |
Neg logit contributions | |
fair | -0.067 |
nicely | -0.067 |
favorite | -0.061 |
cooler | -0.060 |
nice | -0.060 |
Token | . |
Token ID | 13 |
Feature activation | +1.99e-02 |
Pos logit contributions | |
keeping | +0.034 |
outage | +0.032 |
leaf | +0.031 |
fright | +0.031 |
fly | +0.031 |
Neg logit contributions | |
uddle | -0.024 |
nicely | -0.023 |
fair | -0.022 |
aws | -0.022 |
favorite | -0.021 |
Token | " |
Token ID | 366 |
Feature activation | -5.61e-02 |
Pos logit contributions | |
keeping | +0.045 |
outage | +0.043 |
leaf | +0.043 |
harm | +0.043 |
fright | +0.042 |
Neg logit contributions | |
nicely | -0.020 |
fair | -0.019 |
aws | -0.018 |
uddle | -0.018 |
favorite | -0.017 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
nicely | +0.077 |
fair | +0.075 |
aws | +0.071 |
uddle | +0.068 |
favorite | +0.068 |
Neg logit contributions | |
keeping | -0.108 |
outage | -0.102 |
harm | -0.099 |
leaf | -0.098 |
fright | -0.098 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
keeping | +0.508 |
outage | +0.486 |
fright | +0.481 |
leaf | +0.480 |
harm | +0.476 |
Neg logit contributions | |
nicely | -0.139 |
uddle | -0.134 |
aws | -0.131 |
fair | -0.131 |
favorite | -0.120 |
Token | you |
Token ID | 345 |
Feature activation | +7.11e-02 |
Pos logit contributions | |
fair | +0.193 |
nicely | +0.190 |
nice | +0.170 |
favorite | +0.169 |
cart | +0.159 |
Neg logit contributions | |
keeping | -0.315 |
outage | -0.285 |
harm | -0.283 |
leaf | -0.283 |
fly | -0.280 |
Token | can |
Token ID | 460 |
Feature activation | +6.66e-02 |
Pos logit contributions | |
keeping | +0.142 |
leaf | +0.127 |
outage | +0.126 |
harm | +0.124 |
fly | +0.123 |
Neg logit contributions | |
fair | -0.107 |
nicely | -0.104 |
uddle | -0.097 |
nice | -0.097 |
favorite | -0.096 |
Token | . |
Token ID | 13 |
Feature activation | +4.51e-03 |
Pos logit contributions | |
keeping | +0.118 |
leaf | +0.108 |
outage | +0.107 |
fly | +0.103 |
harm | +0.103 |
Neg logit contributions | |
nicely | -0.105 |
fair | -0.102 |
uddle | -0.098 |
aws | -0.097 |
favorite | -0.096 |
Token | Welcome |
Token ID | 19134 |
Feature activation | -4.33e-02 |
Pos logit contributions | |
keeping | +0.008 |
harm | +0.007 |
outage | +0.007 |
leaf | +0.007 |
ull | +0.007 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.007 |
favorite | -0.006 |
aws | -0.006 |
uddle | -0.006 |
Token | asks |
Token ID | 7893 |
Feature activation | +3.46e-02 |
Pos logit contributions | |
keeping | +0.085 |
outage | +0.076 |
leaf | +0.075 |
eway | +0.073 |
harm | +0.072 |
Neg logit contributions | |
fair | -0.073 |
nicely | -0.073 |
uddle | -0.068 |
favorite | -0.067 |
aws | -0.067 |
Token | . |
Token ID | 13 |
Feature activation | +2.08e-02 |
Pos logit contributions | |
keeping | +0.083 |
outage | +0.082 |
leaf | +0.081 |
icated | +0.076 |
fly | +0.076 |
Neg logit contributions | |
nicely | -0.038 |
fair | -0.032 |
nice | -0.029 |
favorite | -0.027 |
hockey | -0.027 |
Token |
|
Token ID | 198 |
Feature activation | +1.03e-02 |
Pos logit contributions | |
keeping | +0.044 |
outage | +0.041 |
leaf | +0.041 |
fly | +0.041 |
fright | +0.041 |
Neg logit contributions | |
nicely | -0.023 |
uddle | -0.023 |
fair | -0.023 |
aws | -0.022 |
favorite | -0.021 |
Token |
|
Token ID | 198 |
Feature activation | +7.42e-03 |
Pos logit contributions | |
keeping | +0.024 |
outage | +0.022 |
leaf | +0.022 |
harm | +0.022 |
fly | +0.022 |
Neg logit contributions | |
nicely | -0.011 |
fair | -0.010 |
aws | -0.010 |
uddle | -0.009 |
nice | -0.009 |
Token | " |
Token ID | 1 |
Feature activation | -6.96e-02 |
Pos logit contributions | |
keeping | +0.016 |
outage | +0.015 |
leaf | +0.015 |
fright | +0.015 |
Judy | +0.015 |
Neg logit contributions | |
nicely | -0.008 |
uddle | -0.008 |
fair | -0.008 |
aws | -0.007 |
favorite | -0.007 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.01e-01 |
Pos logit contributions | |
uddle | +0.108 |
nicely | +0.106 |
aws | +0.104 |
fair | +0.104 |
favorite | +0.100 |
Neg logit contributions | |
keeping | -0.115 |
leaf | -0.115 |
outage | -0.110 |
fly | -0.108 |
fright | -0.108 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
keeping | +0.557 |
leaf | +0.529 |
outage | +0.528 |
harm | +0.516 |
fly | +0.516 |
Neg logit contributions | |
nicely | -0.113 |
fair | -0.108 |
uddle | -0.101 |
aws | -0.095 |
favorite | -0.088 |
Token | Ben |
Token ID | 3932 |
Feature activation | +2.08e-02 |
Pos logit contributions | |
fair | +0.193 |
nicely | +0.190 |
nice | +0.171 |
favorite | +0.170 |
cart | +0.167 |
Neg logit contributions | |
keeping | -0.310 |
outage | -0.282 |
leaf | -0.278 |
harm | -0.274 |
fly | -0.272 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
leaf | +0.060 |
keeping | +0.060 |
outage | +0.059 |
eway | +0.058 |
icated | +0.057 |
Neg logit contributions | |
fair | -0.011 |
, | -0.009 |
nice | -0.008 |
nicely | -0.008 |
cooler | -0.007 |
Token | your |
Token ID | 534 |
Feature activation | -4.30e-02 |
Pos logit contributions | |
fair | +0.220 |
nicely | +0.219 |
favorite | +0.199 |
nice | +0.197 |
cart | +0.194 |
Neg logit contributions | |
keeping | -0.280 |
outage | -0.252 |
leaf | -0.251 |
fly | -0.243 |
harm | -0.243 |
Token | cup |
Token ID | 6508 |
Feature activation | -2.67e-02 |
Pos logit contributions | |
fair | +0.054 |
nicely | +0.053 |
favorite | +0.052 |
uddle | +0.052 |
cart | +0.050 |
Neg logit contributions | |
keeping | -0.092 |
outage | -0.085 |
Judy | -0.084 |
leaf | -0.084 |
Maria | -0.082 |
Token | S |
Token ID | 50 |
Feature activation | +1.92e-02 |
Pos logit contributions | |
keeping | +0.042 |
outage | +0.040 |
harm | +0.040 |
leaf | +0.040 |
fly | +0.039 |
Neg logit contributions | |
nicely | -0.017 |
fair | -0.016 |
aws | -0.015 |
uddle | -0.014 |
hockey | -0.014 |
Token | ara |
Token ID | 3301 |
Feature activation | +2.31e-02 |
Pos logit contributions | |
keeping | +0.039 |
outage | +0.037 |
leaf | +0.037 |
fright | +0.036 |
harm | +0.036 |
Neg logit contributions | |
nicely | -0.023 |
fair | -0.023 |
uddle | -0.022 |
aws | -0.022 |
favorite | -0.021 |
Token | says |
Token ID | 1139 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
keeping | +0.048 |
outage | +0.046 |
leaf | +0.044 |
harm | +0.044 |
eway | +0.043 |
Neg logit contributions | |
nicely | -0.029 |
fair | -0.029 |
uddle | -0.029 |
aws | -0.028 |
favorite | -0.026 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.099 |
fright | +0.096 |
Judy | +0.095 |
fly | +0.095 |
leaf | +0.094 |
Neg logit contributions | |
uddle | -0.033 |
nicely | -0.031 |
aws | -0.030 |
fair | -0.030 |
favorite | -0.028 |
Token | " |
Token ID | 366 |
Feature activation | -6.01e-02 |
Pos logit contributions | |
nicely | +0.090 |
fair | +0.085 |
uddle | +0.082 |
aws | +0.078 |
favorite | +0.073 |
Neg logit contributions | |
keeping | -0.386 |
outage | -0.368 |
leaf | -0.366 |
harm | -0.360 |
fright | -0.359 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.01e-01 |
Pos logit contributions | |
nicely | +0.078 |
fair | +0.078 |
aws | +0.074 |
sandbox | +0.069 |
uddle | +0.068 |
Neg logit contributions | |
keeping | -0.119 |
outage | -0.118 |
harm | -0.111 |
fly | -0.110 |
fright | -0.109 |
Token | , |
Token ID | 11 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
keeping | +0.494 |
fright | +0.476 |
outage | +0.474 |
Judy | +0.473 |
leaf | +0.467 |
Neg logit contributions | |
nicely | -0.147 |
uddle | -0.138 |
fair | -0.134 |
aws | -0.133 |
favorite | -0.126 |
Token | Mom |
Token ID | 11254 |
Feature activation | +8.32e-02 |
Pos logit contributions | |
fair | +0.197 |
nicely | +0.193 |
cart | +0.175 |
favorite | +0.174 |
nice | +0.173 |
Neg logit contributions | |
keeping | -0.326 |
outage | -0.295 |
harm | -0.290 |
leaf | -0.287 |
rated | -0.285 |
Token | . |
Token ID | 13 |
Feature activation | +5.93e-03 |
Pos logit contributions | |
keeping | +0.180 |
leaf | +0.175 |
outage | +0.173 |
fright | +0.171 |
eway | +0.170 |
Neg logit contributions | |
fair | -0.099 |
nicely | -0.094 |
aws | -0.087 |
nice | -0.086 |
uddle | -0.083 |
Token | I |
Token ID | 314 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
keeping | +0.011 |
harm | +0.010 |
outage | +0.010 |
leaf | +0.010 |
fright | +0.010 |
Neg logit contributions | |
nicely | -0.009 |
fair | -0.009 |
favorite | -0.008 |
uddle | -0.008 |
cart | -0.008 |
Token | am |
Token ID | 716 |
Feature activation | -4.29e-02 |
Pos logit contributions | |
keeping | +0.078 |
outage | +0.067 |
eway | +0.066 |
Judy | +0.066 |
icated | +0.066 |
Neg logit contributions | |
nicely | -0.066 |
fair | -0.066 |
favorite | -0.062 |
nice | -0.062 |
uddle | -0.061 |
Token | says |
Token ID | 1139 |
Feature activation | +3.74e-02 |
Pos logit contributions | |
keeping | +0.141 |
outage | +0.124 |
harm | +0.122 |
eway | +0.120 |
leaf | +0.118 |
Neg logit contributions | |
nicely | -0.098 |
fair | -0.096 |
aws | -0.088 |
favorite | -0.087 |
nice | -0.085 |
Token | . |
Token ID | 13 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
keeping | +0.084 |
outage | +0.078 |
leaf | +0.078 |
harm | +0.076 |
fright | +0.075 |
Neg logit contributions | |
nicely | -0.043 |
fair | -0.040 |
uddle | -0.037 |
favorite | -0.037 |
aws | -0.036 |
Token |
|
Token ID | 198 |
Feature activation | +6.54e-03 |
Pos logit contributions | |
keeping | +0.035 |
outage | +0.033 |
leaf | +0.033 |
fright | +0.033 |
fly | +0.032 |
Neg logit contributions | |
nicely | -0.016 |
fair | -0.016 |
uddle | -0.016 |
aws | -0.015 |
favorite | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | +4.39e-03 |
Pos logit contributions | |
keeping | +0.015 |
outage | +0.014 |
leaf | +0.014 |
fly | +0.014 |
harm | +0.014 |
Neg logit contributions | |
nicely | -0.007 |
aws | -0.006 |
fair | -0.006 |
uddle | -0.006 |
nice | -0.006 |
Token | " |
Token ID | 1 |
Feature activation | -7.28e-02 |
Pos logit contributions | |
keeping | +0.010 |
outage | +0.009 |
leaf | +0.009 |
fright | +0.009 |
harm | +0.009 |
Neg logit contributions | |
nicely | -0.005 |
uddle | -0.004 |
fair | -0.004 |
aws | -0.004 |
favorite | -0.004 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.01e-01 |
Pos logit contributions | |
uddle | +0.125 |
nicely | +0.117 |
aws | +0.115 |
favorite | +0.113 |
cart | +0.113 |
Neg logit contributions | |
leaf | -0.115 |
keeping | -0.113 |
outage | -0.106 |
fright | -0.105 |
fly | -0.104 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.510 |
outage | +0.489 |
fright | +0.485 |
leaf | +0.482 |
Judy | +0.481 |
Neg logit contributions | |
nicely | -0.136 |
uddle | -0.129 |
fair | -0.128 |
aws | -0.125 |
favorite | -0.116 |
Token | I |
Token ID | 314 |
Feature activation | +4.13e-02 |
Pos logit contributions | |
fair | +0.186 |
nicely | +0.180 |
nice | +0.165 |
favorite | +0.160 |
cart | +0.156 |
Neg logit contributions | |
keeping | -0.343 |
outage | -0.296 |
leaf | -0.295 |
harm | -0.295 |
rated | -0.295 |
Token | can |
Token ID | 460 |
Feature activation | +6.28e-02 |
Pos logit contributions | |
keeping | +0.074 |
leaf | +0.066 |
outage | +0.065 |
fly | +0.064 |
harm | +0.063 |
Neg logit contributions | |
nicely | -0.067 |
fair | -0.066 |
uddle | -0.063 |
favorite | -0.063 |
aws | -0.062 |
Token | . |
Token ID | 13 |
Feature activation | +5.46e-04 |
Pos logit contributions | |
keeping | +0.118 |
leaf | +0.107 |
outage | +0.106 |
Judy | +0.102 |
harm | +0.101 |
Neg logit contributions | |
nicely | -0.095 |
fair | -0.091 |
uddle | -0.087 |
favorite | -0.086 |
aws | -0.086 |
Token | But |
Token ID | 887 |
Feature activation | +7.46e-02 |
Pos logit contributions | |
keeping | +0.001 |
outage | +0.001 |
harm | +0.001 |
ull | +0.001 |
leaf | +0.001 |
Neg logit contributions | |
nicely | -0.001 |
fair | -0.001 |
favorite | -0.001 |
aws | -0.001 |
uddle | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +8.28e-03 |
Pos logit contributions | |
keeping | +0.030 |
outage | +0.029 |
leaf | +0.028 |
fly | +0.028 |
harm | +0.028 |
Neg logit contributions | |
nicely | -0.013 |
aws | -0.011 |
fair | -0.011 |
uddle | -0.010 |
nice | -0.010 |
Token | They |
Token ID | 2990 |
Feature activation | -1.52e-02 |
Pos logit contributions | |
keeping | +0.019 |
outage | +0.018 |
leaf | +0.018 |
harm | +0.018 |
fly | +0.018 |
Neg logit contributions | |
nicely | -0.008 |
fair | -0.008 |
aws | -0.007 |
uddle | -0.007 |
favorite | -0.007 |
Token | said |
Token ID | 531 |
Feature activation | +3.69e-02 |
Pos logit contributions | |
nicely | +0.022 |
fair | +0.022 |
aws | +0.021 |
uddle | +0.020 |
favorite | +0.020 |
Neg logit contributions | |
keeping | -0.030 |
outage | -0.028 |
harm | -0.026 |
leaf | -0.026 |
fly | -0.026 |
Token | , |
Token ID | 11 |
Feature activation | -1.43e-01 |
Pos logit contributions | |
keeping | +0.084 |
fright | +0.082 |
Judy | +0.082 |
leaf | +0.081 |
Maria | +0.081 |
Neg logit contributions | |
uddle | -0.032 |
aws | -0.030 |
fair | -0.028 |
nicely | -0.028 |
sandbox | -0.027 |
Token | " |
Token ID | 366 |
Feature activation | -6.13e-02 |
Pos logit contributions | |
nicely | +0.097 |
uddle | +0.094 |
fair | +0.094 |
aws | +0.089 |
favorite | +0.083 |
Neg logit contributions | |
keeping | -0.367 |
outage | -0.348 |
leaf | -0.348 |
fright | -0.344 |
fly | -0.343 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.090 |
fair | +0.087 |
aws | +0.082 |
favorite | +0.080 |
uddle | +0.080 |
Neg logit contributions | |
keeping | -0.114 |
outage | -0.106 |
harm | -0.105 |
Judy | -0.102 |
fright | -0.101 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
keeping | +0.528 |
outage | +0.503 |
leaf | +0.500 |
fright | +0.495 |
harm | +0.493 |
Neg logit contributions | |
nicely | -0.127 |
fair | -0.119 |
uddle | -0.117 |
aws | -0.113 |
favorite | -0.104 |
Token | mom |
Token ID | 1995 |
Feature activation | +9.12e-02 |
Pos logit contributions | |
fair | +0.189 |
nicely | +0.184 |
nice | +0.169 |
favorite | +0.163 |
cart | +0.159 |
Neg logit contributions | |
keeping | -0.336 |
harm | -0.293 |
fly | -0.289 |
rum | -0.288 |
leaf | -0.288 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.231 |
leaf | +0.224 |
fright | +0.221 |
outage | +0.218 |
harm | +0.217 |
Neg logit contributions | |
fair | -0.075 |
nicely | -0.072 |
aws | -0.063 |
nice | -0.060 |
uddle | -0.055 |
Token | we |
Token ID | 356 |
Feature activation | +3.28e-02 |
Pos logit contributions | |
fair | +0.214 |
nicely | +0.210 |
favorite | +0.192 |
nice | +0.191 |
cart | +0.186 |
Neg logit contributions | |
keeping | -0.300 |
outage | -0.265 |
fly | -0.264 |
harm | -0.263 |
leaf | -0.263 |
Token | had |
Token ID | 550 |
Feature activation | +3.21e-02 |
Pos logit contributions | |
keeping | +0.066 |
outage | +0.058 |
eway | +0.057 |
jo | +0.057 |
fly | +0.056 |
Neg logit contributions | |
fair | -0.050 |
nicely | -0.049 |
nice | -0.046 |
favorite | -0.045 |
uddle | -0.044 |
Token | . |
Token ID | 13 |
Feature activation | +1.75e-02 |
Pos logit contributions | |
nicely | +0.028 |
fair | +0.027 |
favorite | +0.024 |
nice | +0.023 |
aws | +0.023 |
Neg logit contributions | |
keeping | -0.063 |
outage | -0.061 |
leaf | -0.060 |
harm | -0.057 |
icated | -0.057 |
Token | He |
Token ID | 679 |
Feature activation | -3.98e-02 |
Pos logit contributions | |
keeping | +0.040 |
outage | +0.037 |
leaf | +0.037 |
fright | +0.036 |
fly | +0.036 |
Neg logit contributions | |
nicely | -0.017 |
fair | -0.017 |
uddle | -0.017 |
aws | -0.016 |
favorite | -0.015 |
Token | says |
Token ID | 1139 |
Feature activation | +4.25e-02 |
Pos logit contributions | |
fair | +0.064 |
nicely | +0.064 |
uddle | +0.063 |
aws | +0.062 |
favorite | +0.062 |
Neg logit contributions | |
keeping | -0.072 |
outage | -0.064 |
icated | -0.060 |
harm | -0.060 |
eway | -0.060 |
Token | , |
Token ID | 11 |
Feature activation | -1.41e-01 |
Pos logit contributions | |
keeping | +0.093 |
Judy | +0.089 |
outage | +0.088 |
fright | +0.088 |
leaf | +0.088 |
Neg logit contributions | |
uddle | -0.043 |
nicely | -0.040 |
aws | -0.040 |
fair | -0.039 |
favorite | -0.038 |
Token | " |
Token ID | 366 |
Feature activation | -6.19e-02 |
Pos logit contributions | |
uddle | +0.100 |
nicely | +0.100 |
fair | +0.098 |
aws | +0.095 |
favorite | +0.087 |
Neg logit contributions | |
keeping | -0.353 |
outage | -0.337 |
leaf | -0.335 |
fly | -0.333 |
fright | -0.332 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.079 |
fair | +0.079 |
aws | +0.074 |
hockey | +0.069 |
uddle | +0.068 |
Neg logit contributions | |
keeping | -0.125 |
outage | -0.125 |
harm | -0.116 |
fright | -0.113 |
fly | -0.113 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.535 |
outage | +0.509 |
leaf | +0.507 |
fright | +0.499 |
harm | +0.498 |
Neg logit contributions | |
nicely | -0.121 |
fair | -0.115 |
uddle | -0.111 |
aws | -0.106 |
favorite | -0.098 |
Token | please |
Token ID | 3387 |
Feature activation | +6.88e-02 |
Pos logit contributions | |
fair | +0.220 |
nicely | +0.211 |
nice | +0.198 |
cart | +0.195 |
favorite | +0.190 |
Neg logit contributions | |
keeping | -0.308 |
outage | -0.275 |
harm | -0.267 |
fly | -0.263 |
jo | -0.260 |
Token | . |
Token ID | 13 |
Feature activation | +4.67e-03 |
Pos logit contributions | |
keeping | +0.151 |
leaf | +0.143 |
outage | +0.140 |
jo | +0.134 |
ull | +0.134 |
Neg logit contributions | |
nicely | -0.087 |
fair | -0.085 |
nice | -0.077 |
uddle | -0.071 |
favorite | -0.068 |
Token | Thank |
Token ID | 6952 |
Feature activation | +3.80e-02 |
Pos logit contributions | |
keeping | +0.009 |
harm | +0.008 |
outage | +0.008 |
leaf | +0.008 |
fright | +0.007 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.007 |
favorite | -0.007 |
hockey | -0.006 |
aws | -0.006 |
Token | you |
Token ID | 345 |
Feature activation | +6.81e-02 |
Pos logit contributions | |
outage | +0.066 |
keeping | +0.066 |
leaf | +0.060 |
fly | +0.060 |
harm | +0.059 |
Neg logit contributions | |
nicely | -0.062 |
fair | -0.060 |
aws | -0.056 |
nice | -0.055 |
uddle | -0.054 |
Token | Tom |
Token ID | 13787 |
Feature activation | +6.07e-02 |
Pos logit contributions | |
keeping | +0.031 |
outage | +0.030 |
harm | +0.029 |
leaf | +0.029 |
fly | +0.029 |
Neg logit contributions | |
nicely | -0.012 |
fair | -0.011 |
aws | -0.010 |
uddle | -0.010 |
hockey | -0.009 |
Token | my |
Token ID | 1820 |
Feature activation | +7.25e-02 |
Pos logit contributions | |
keeping | +0.118 |
outage | +0.109 |
leaf | +0.104 |
harm | +0.102 |
eway | +0.101 |
Neg logit contributions | |
nicely | -0.090 |
fair | -0.089 |
uddle | -0.086 |
aws | -0.086 |
favorite | -0.082 |
Token | said |
Token ID | 531 |
Feature activation | +3.60e-02 |
Pos logit contributions | |
keeping | +0.137 |
outage | +0.124 |
leaf | +0.120 |
harm | +0.117 |
eway | +0.115 |
Neg logit contributions | |
nicely | -0.112 |
fair | -0.110 |
uddle | -0.107 |
favorite | -0.104 |
aws | -0.104 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
fright | +0.073 |
ull | +0.073 |
fly | +0.072 |
Maria | +0.072 |
Judy | +0.071 |
Neg logit contributions | |
aws | -0.040 |
uddle | -0.040 |
sandbox | -0.037 |
gym | -0.035 |
udd | -0.035 |
Token | " |
Token ID | 366 |
Feature activation | -6.07e-02 |
Pos logit contributions | |
uddle | +0.104 |
nicely | +0.102 |
fair | +0.100 |
aws | +0.099 |
favorite | +0.089 |
Neg logit contributions | |
keeping | -0.365 |
outage | -0.347 |
leaf | -0.346 |
fly | -0.345 |
fright | -0.344 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.086 |
fair | +0.083 |
favorite | +0.077 |
aws | +0.077 |
uddle | +0.077 |
Neg logit contributions | |
keeping | -0.117 |
outage | -0.108 |
harm | -0.106 |
Judy | -0.105 |
fright | -0.105 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.481 |
fright | +0.465 |
outage | +0.464 |
Judy | +0.462 |
Maria | +0.459 |
Neg logit contributions | |
nicely | -0.148 |
uddle | -0.146 |
aws | -0.145 |
fair | -0.136 |
favorite | -0.130 |
Token | I |
Token ID | 314 |
Feature activation | +4.17e-02 |
Pos logit contributions | |
fair | +0.194 |
nicely | +0.190 |
nice | +0.171 |
favorite | +0.171 |
cart | +0.164 |
Neg logit contributions | |
keeping | -0.321 |
leaf | -0.289 |
harm | -0.288 |
outage | -0.287 |
fly | -0.283 |
Token | can |
Token ID | 460 |
Feature activation | +6.42e-02 |
Pos logit contributions | |
keeping | +0.076 |
leaf | +0.066 |
outage | +0.066 |
Judy | +0.065 |
fly | +0.065 |
Neg logit contributions | |
nicely | -0.068 |
fair | -0.066 |
uddle | -0.063 |
favorite | -0.063 |
nice | -0.061 |
Token | say |
Token ID | 910 |
Feature activation | +6.02e-02 |
Pos logit contributions | |
keeping | +0.103 |
outage | +0.096 |
leaf | +0.095 |
fright | +0.093 |
harm | +0.092 |
Neg logit contributions | |
nicely | -0.106 |
fair | -0.104 |
uddle | -0.104 |
aws | -0.103 |
favorite | -0.099 |
Token | suggest |
Token ID | 1950 |
Feature activation | -5.42e-02 |
Pos logit contributions | |
keeping | +0.109 |
outage | +0.102 |
leaf | +0.101 |
fright | +0.098 |
harm | +0.098 |
Neg logit contributions | |
nicely | -0.088 |
fair | -0.086 |
uddle | -0.085 |
aws | -0.083 |
favorite | -0.081 |
Token | . |
Token ID | 13 |
Feature activation | +1.25e-02 |
Pos logit contributions | |
keeping | +0.016 |
outage | +0.016 |
leaf | +0.015 |
fright | +0.015 |
Judy | +0.015 |
Neg logit contributions | |
nicely | -0.009 |
fair | -0.009 |
aws | -0.008 |
uddle | -0.008 |
favorite | -0.008 |
Token | They |
Token ID | 1119 |
Feature activation | -2.20e-02 |
Pos logit contributions | |
keeping | +0.028 |
outage | +0.026 |
leaf | +0.026 |
harm | +0.026 |
fright | +0.025 |
Neg logit contributions | |
nicely | -0.014 |
fair | -0.013 |
aws | -0.013 |
uddle | -0.012 |
favorite | -0.012 |
Token | say |
Token ID | 910 |
Feature activation | +7.36e-02 |
Pos logit contributions | |
nicely | +0.032 |
fair | +0.032 |
nice | +0.029 |
sandbox | +0.028 |
favorite | +0.028 |
Neg logit contributions | |
keeping | -0.044 |
outage | -0.041 |
eway | -0.039 |
rum | -0.038 |
jo | -0.038 |
Token | , |
Token ID | 11 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
keeping | +0.150 |
leaf | +0.142 |
fright | +0.141 |
outage | +0.140 |
Judy | +0.140 |
Neg logit contributions | |
uddle | -0.086 |
nicely | -0.083 |
aws | -0.081 |
fair | -0.080 |
favorite | -0.077 |
Token | " |
Token ID | 366 |
Feature activation | -6.08e-02 |
Pos logit contributions | |
nicely | +0.094 |
fair | +0.089 |
uddle | +0.087 |
aws | +0.083 |
favorite | +0.078 |
Neg logit contributions | |
keeping | -0.378 |
outage | -0.359 |
leaf | -0.358 |
fright | -0.353 |
harm | -0.351 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
fair | +0.081 |
nicely | +0.081 |
aws | +0.078 |
hockey | +0.071 |
uddle | +0.070 |
Neg logit contributions | |
outage | -0.121 |
keeping | -0.119 |
Maria | -0.111 |
fly | -0.111 |
harm | -0.111 |
Token | , |
Token ID | 11 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
keeping | +0.489 |
fright | +0.472 |
outage | +0.469 |
Judy | +0.466 |
leaf | +0.462 |
Neg logit contributions | |
nicely | -0.143 |
uddle | -0.137 |
aws | -0.134 |
fair | -0.130 |
favorite | -0.124 |
Token | Mom |
Token ID | 11254 |
Feature activation | +8.53e-02 |
Pos logit contributions | |
fair | +0.196 |
nicely | +0.195 |
cart | +0.175 |
favorite | +0.174 |
nice | +0.174 |
Neg logit contributions | |
keeping | -0.320 |
outage | -0.290 |
harm | -0.284 |
leaf | -0.283 |
fly | -0.281 |
Token | , |
Token ID | 11 |
Feature activation | -1.50e-01 |
Pos logit contributions | |
keeping | +0.208 |
leaf | +0.204 |
outage | +0.200 |
fright | +0.199 |
harm | +0.196 |
Neg logit contributions | |
fair | -0.077 |
nicely | -0.075 |
aws | -0.066 |
nice | -0.064 |
uddle | -0.063 |
Token | we |
Token ID | 356 |
Feature activation | +3.19e-02 |
Pos logit contributions | |
nicely | +0.232 |
fair | +0.231 |
uddle | +0.217 |
favorite | +0.215 |
aws | +0.215 |
Neg logit contributions | |
keeping | -0.271 |
outage | -0.248 |
leaf | -0.243 |
harm | -0.239 |
fly | -0.238 |
Token | will |
Token ID | 481 |
Feature activation | +6.25e-02 |
Pos logit contributions | |
keeping | +0.063 |
outage | +0.058 |
jo | +0.056 |
eway | +0.055 |
fly | +0.055 |
Neg logit contributions | |
fair | -0.049 |
nicely | -0.048 |
nice | -0.045 |
aws | -0.043 |
favorite | -0.043 |
Token | ?" |
Token ID | 1701 |
Feature activation | +1.36e-01 |
Pos logit contributions | |
fly | +0.141 |
keeping | +0.139 |
growth | +0.136 |
ull | +0.134 |
leaf | +0.134 |
Neg logit contributions | |
fair | -0.033 |
nicely | -0.030 |
nice | -0.029 |
favorite | -0.028 |
uddle | -0.025 |
Token | Lily |
Token ID | 20037 |
Feature activation | +5.93e-02 |
Pos logit contributions | |
keeping | +0.311 |
outage | +0.301 |
leaf | +0.291 |
harm | +0.284 |
fly | +0.281 |
Neg logit contributions | |
nicely | -0.151 |
fair | -0.139 |
aws | -0.131 |
favorite | -0.127 |
uddle | -0.125 |
Token | said |
Token ID | 531 |
Feature activation | +3.72e-02 |
Pos logit contributions | |
keeping | +0.117 |
outage | +0.105 |
leaf | +0.105 |
eway | +0.102 |
fly | +0.098 |
Neg logit contributions | |
nicely | -0.089 |
fair | -0.088 |
uddle | -0.081 |
nice | -0.080 |
favorite | -0.080 |
Token | , |
Token ID | 11 |
Feature activation | -1.41e-01 |
Pos logit contributions | |
keeping | +0.073 |
outage | +0.069 |
fright | +0.068 |
leaf | +0.068 |
fly | +0.068 |
Neg logit contributions | |
uddle | -0.046 |
aws | -0.045 |
fair | -0.044 |
nicely | -0.044 |
favorite | -0.041 |
Token | " |
Token ID | 366 |
Feature activation | -6.22e-02 |
Pos logit contributions | |
nicely | +0.159 |
fair | +0.154 |
uddle | +0.147 |
aws | +0.145 |
favorite | +0.141 |
Neg logit contributions | |
keeping | -0.304 |
outage | -0.287 |
leaf | -0.284 |
harm | -0.277 |
fright | -0.276 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.082 |
fair | +0.078 |
uddle | +0.075 |
aws | +0.074 |
favorite | +0.073 |
Neg logit contributions | |
keeping | -0.126 |
outage | -0.116 |
leaf | -0.115 |
harm | -0.114 |
fright | -0.113 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
keeping | +0.523 |
outage | +0.499 |
leaf | +0.495 |
fright | +0.490 |
harm | +0.488 |
Neg logit contributions | |
nicely | -0.130 |
fair | -0.123 |
uddle | -0.121 |
aws | -0.116 |
favorite | -0.107 |
Token | I |
Token ID | 314 |
Feature activation | +4.10e-02 |
Pos logit contributions | |
fair | +0.191 |
nicely | +0.188 |
nice | +0.171 |
favorite | +0.170 |
cart | +0.161 |
Neg logit contributions | |
keeping | -0.328 |
harm | -0.288 |
leaf | -0.287 |
outage | -0.283 |
fly | -0.280 |
Token | am |
Token ID | 716 |
Feature activation | -4.07e-02 |
Pos logit contributions | |
keeping | +0.076 |
leaf | +0.066 |
outage | +0.065 |
Judy | +0.065 |
fly | +0.064 |
Neg logit contributions | |
nicely | -0.066 |
fair | -0.064 |
uddle | -0.062 |
favorite | -0.061 |
nice | -0.059 |
Token | ready |
Token ID | 3492 |
Feature activation | -3.07e-02 |
Pos logit contributions | |
uddle | +0.047 |
nicely | +0.045 |
aws | +0.045 |
fair | +0.042 |
cart | +0.041 |
Neg logit contributions | |
keeping | -0.085 |
leaf | -0.081 |
outage | -0.080 |
fright | -0.079 |
Judy | -0.079 |
Token | ." |
Token ID | 526 |
Feature activation | +5.75e-02 |
Pos logit contributions | |
nicely | +0.033 |
fair | +0.032 |
favorite | +0.030 |
nice | +0.029 |
uddle | +0.026 |
Neg logit contributions | |
keeping | -0.070 |
leaf | -0.068 |
outage | -0.067 |
fly | -0.066 |
fright | -0.065 |
Token | and |
Token ID | 290 |
Feature activation | +4.54e-02 |
Pos logit contributions | |
keeping | +0.111 |
outage | +0.103 |
leaf | +0.099 |
harm | +0.097 |
fright | +0.095 |
Neg logit contributions | |
nicely | -0.091 |
fair | -0.090 |
uddle | -0.088 |
aws | -0.087 |
favorite | -0.084 |
Token | Tom |
Token ID | 4186 |
Feature activation | +4.42e-02 |
Pos logit contributions | |
keeping | +0.099 |
outage | +0.097 |
leaf | +0.093 |
eway | +0.092 |
rated | +0.089 |
Neg logit contributions | |
fair | -0.053 |
nicely | -0.053 |
aws | -0.047 |
favorite | -0.046 |
hockey | -0.045 |
Token | said |
Token ID | 531 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
keeping | +0.075 |
outage | +0.069 |
leaf | +0.069 |
fright | +0.067 |
harm | +0.067 |
Neg logit contributions | |
nicely | -0.069 |
fair | -0.068 |
uddle | -0.067 |
aws | -0.066 |
favorite | -0.064 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
fright | +0.071 |
keeping | +0.070 |
ull | +0.070 |
fly | +0.069 |
Judy | +0.069 |
Neg logit contributions | |
uddle | -0.033 |
aws | -0.032 |
sandbox | -0.030 |
gym | -0.030 |
udd | -0.029 |
Token | " |
Token ID | 366 |
Feature activation | -6.41e-02 |
Pos logit contributions | |
uddle | +0.104 |
nicely | +0.103 |
fair | +0.100 |
aws | +0.099 |
favorite | +0.090 |
Neg logit contributions | |
keeping | -0.366 |
outage | -0.348 |
leaf | -0.347 |
fly | -0.346 |
fright | -0.345 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.097 |
fair | +0.094 |
favorite | +0.088 |
aws | +0.087 |
uddle | +0.085 |
Neg logit contributions | |
keeping | -0.118 |
outage | -0.107 |
Judy | -0.107 |
harm | -0.106 |
tal | -0.106 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.498 |
outage | +0.479 |
fright | +0.478 |
Judy | +0.472 |
leaf | +0.470 |
Neg logit contributions | |
nicely | -0.139 |
uddle | -0.133 |
aws | -0.130 |
fair | -0.127 |
favorite | -0.118 |
Token | mom |
Token ID | 1995 |
Feature activation | +9.10e-02 |
Pos logit contributions | |
fair | +0.197 |
nicely | +0.193 |
nice | +0.175 |
favorite | +0.174 |
cart | +0.166 |
Neg logit contributions | |
keeping | -0.319 |
leaf | -0.285 |
harm | -0.285 |
outage | -0.283 |
fly | -0.281 |
Token | , |
Token ID | 11 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
keeping | +0.214 |
leaf | +0.208 |
fright | +0.206 |
outage | +0.202 |
tal | +0.199 |
Neg logit contributions | |
fair | -0.092 |
nicely | -0.089 |
aws | -0.079 |
nice | -0.077 |
uddle | -0.073 |
Token | we |
Token ID | 356 |
Feature activation | +3.31e-02 |
Pos logit contributions | |
fair | +0.218 |
nicely | +0.216 |
favorite | +0.201 |
nice | +0.195 |
cart | +0.191 |
Neg logit contributions | |
keeping | -0.294 |
outage | -0.262 |
leaf | -0.262 |
harm | -0.259 |
fly | -0.258 |
Token | understand |
Token ID | 1833 |
Feature activation | +7.13e-02 |
Pos logit contributions | |
keeping | +0.065 |
outage | +0.059 |
eway | +0.057 |
fly | +0.057 |
jo | +0.056 |
Neg logit contributions | |
fair | -0.051 |
nicely | -0.051 |
nice | -0.047 |
favorite | -0.046 |
aws | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
keeping | +0.126 |
outage | +0.120 |
fly | +0.118 |
fright | +0.116 |
harm | +0.116 |
Neg logit contributions | |
uddle | -0.089 |
nicely | -0.088 |
fair | -0.085 |
aws | -0.085 |
favorite | -0.083 |
Token | She |
Token ID | 1375 |
Feature activation | -2.91e-02 |
Pos logit contributions | |
keeping | +0.029 |
outage | +0.027 |
leaf | +0.027 |
fright | +0.026 |
harm | +0.026 |
Neg logit contributions | |
nicely | -0.014 |
fair | -0.014 |
uddle | -0.014 |
aws | -0.013 |
favorite | -0.013 |
Token | smiles |
Token ID | 21845 |
Feature activation | +1.00e-02 |
Pos logit contributions | |
nicely | +0.050 |
fair | +0.050 |
uddle | +0.049 |
aws | +0.048 |
favorite | +0.047 |
Neg logit contributions | |
keeping | -0.046 |
outage | -0.042 |
leaf | -0.041 |
harm | -0.040 |
fright | -0.040 |
Token | . |
Token ID | 13 |
Feature activation | +1.46e-02 |
Pos logit contributions | |
keeping | +0.015 |
fly | +0.015 |
fright | +0.014 |
harm | +0.014 |
Judy | +0.014 |
Neg logit contributions | |
uddle | -0.018 |
nicely | -0.016 |
sandbox | -0.016 |
gym | -0.015 |
cart | -0.015 |
Token | " |
Token ID | 366 |
Feature activation | -6.20e-02 |
Pos logit contributions | |
keeping | +0.031 |
outage | +0.029 |
leaf | +0.029 |
Judy | +0.029 |
fright | +0.028 |
Neg logit contributions | |
nicely | -0.016 |
uddle | -0.016 |
fair | -0.016 |
aws | -0.015 |
favorite | -0.015 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.092 |
fair | +0.090 |
uddle | +0.089 |
aws | +0.087 |
favorite | +0.085 |
Neg logit contributions | |
keeping | -0.111 |
outage | -0.103 |
leaf | -0.103 |
fright | -0.100 |
harm | -0.100 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.486 |
outage | +0.471 |
fright | +0.470 |
Judy | +0.465 |
harm | +0.463 |
Neg logit contributions | |
nicely | -0.144 |
uddle | -0.140 |
aws | -0.136 |
fair | -0.133 |
favorite | -0.126 |
Token | I |
Token ID | 314 |
Feature activation | +4.07e-02 |
Pos logit contributions | |
fair | +0.191 |
nicely | +0.188 |
favorite | +0.170 |
nice | +0.168 |
cart | +0.165 |
Neg logit contributions | |
keeping | -0.324 |
outage | -0.287 |
leaf | -0.283 |
harm | -0.282 |
fly | -0.280 |
Token | got |
Token ID | 1392 |
Feature activation | -1.16e-01 |
Pos logit contributions | |
keeping | +0.074 |
outage | +0.067 |
leaf | +0.066 |
fly | +0.064 |
Judy | +0.064 |
Neg logit contributions | |
nicely | -0.064 |
fair | -0.063 |
uddle | -0.061 |
favorite | -0.060 |
aws | -0.059 |
Token | it |
Token ID | 340 |
Feature activation | -6.09e-02 |
Pos logit contributions | |
fair | +0.155 |
nicely | +0.153 |
favorite | +0.140 |
nice | +0.139 |
aws | +0.138 |
Neg logit contributions | |
keeping | -0.239 |
outage | -0.238 |
jo | -0.222 |
fly | -0.220 |
eway | -0.219 |
Token | ! |
Token ID | 0 |
Feature activation | +2.36e-02 |
Pos logit contributions | |
nicely | +0.092 |
fair | +0.091 |
uddle | +0.087 |
aws | +0.087 |
favorite | +0.085 |
Neg logit contributions | |
keeping | -0.108 |
outage | -0.101 |
leaf | -0.101 |
fright | -0.097 |
harm | -0.097 |
Token |
|
Token ID | 198 |
Feature activation | +2.64e-02 |
Pos logit contributions | |
keeping | +0.064 |
outage | +0.060 |
leaf | +0.059 |
fright | +0.059 |
harm | +0.058 |
Neg logit contributions | |
nicely | -0.035 |
fair | -0.034 |
uddle | -0.033 |
aws | -0.032 |
favorite | -0.031 |
Token |
|
Token ID | 198 |
Feature activation | +1.77e-02 |
Pos logit contributions | |
keeping | +0.063 |
outage | +0.062 |
leaf | +0.061 |
harm | +0.060 |
rated | +0.059 |
Neg logit contributions | |
nicely | -0.027 |
fair | -0.023 |
aws | -0.022 |
hockey | -0.021 |
nice | -0.020 |
Token | They |
Token ID | 2990 |
Feature activation | -1.52e-02 |
Pos logit contributions | |
keeping | +0.042 |
outage | +0.040 |
harm | +0.040 |
leaf | +0.040 |
fly | +0.039 |
Neg logit contributions | |
nicely | -0.017 |
fair | -0.016 |
aws | -0.015 |
uddle | -0.014 |
hockey | -0.014 |
Token | said |
Token ID | 531 |
Feature activation | +3.88e-02 |
Pos logit contributions | |
nicely | +0.019 |
fair | +0.019 |
aws | +0.018 |
favorite | +0.017 |
cart | +0.016 |
Neg logit contributions | |
keeping | -0.033 |
outage | -0.032 |
leaf | -0.029 |
rum | -0.029 |
harm | -0.029 |
Token | " |
Token ID | 366 |
Feature activation | -5.94e-02 |
Pos logit contributions | |
keeping | +0.084 |
fright | +0.079 |
Judy | +0.079 |
leaf | +0.079 |
outage | +0.079 |
Neg logit contributions | |
uddle | -0.041 |
aws | -0.039 |
nicely | -0.038 |
fair | -0.037 |
sandbox | -0.035 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.079 |
fair | +0.078 |
aws | +0.074 |
hockey | +0.068 |
favorite | +0.066 |
Neg logit contributions | |
outage | -0.118 |
keeping | -0.112 |
tal | -0.111 |
rum | -0.107 |
Maria | -0.107 |
Token | ! |
Token ID | 0 |
Feature activation | +2.57e-02 |
Pos logit contributions | |
keeping | +0.523 |
leaf | +0.491 |
outage | +0.488 |
fly | +0.477 |
harm | +0.477 |
Neg logit contributions | |
nicely | -0.160 |
fair | -0.160 |
uddle | -0.141 |
aws | -0.134 |
favorite | -0.132 |
Token | You |
Token ID | 921 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
keeping | +0.044 |
harm | +0.044 |
outage | +0.041 |
fright | +0.040 |
leaf | +0.040 |
Neg logit contributions | |
nicely | -0.044 |
fair | -0.039 |
favorite | -0.038 |
aws | -0.038 |
hockey | -0.037 |
Token | found |
Token ID | 1043 |
Feature activation | -6.40e-02 |
Pos logit contributions | |
keeping | +0.248 |
eway | +0.213 |
rum | +0.211 |
jo | +0.209 |
harm | +0.209 |
Neg logit contributions | |
fair | -0.157 |
nice | -0.141 |
nicely | -0.138 |
favorite | -0.138 |
best | -0.131 |
Token | a |
Token ID | 257 |
Feature activation | +3.17e-02 |
Pos logit contributions | |
fair | +0.095 |
nicely | +0.092 |
favorite | +0.087 |
aws | +0.087 |
nice | +0.084 |
Neg logit contributions | |
outage | -0.119 |
keeping | -0.118 |
leaf | -0.113 |
fly | -0.109 |
jo | -0.108 |
Token | dress |
Token ID | 6576 |
Feature activation | -2.94e-02 |
Pos logit contributions | |
keeping | +0.069 |
outage | +0.065 |
leaf | +0.064 |
fright | +0.063 |
harm | +0.063 |
Neg logit contributions | |
nicely | -0.034 |
fair | -0.033 |
uddle | -0.033 |
aws | -0.032 |
favorite | -0.031 |
Token |
|
Token ID | 198 |
Feature activation | +2.63e-03 |
Pos logit contributions | |
keeping | +0.011 |
rated | +0.011 |
outage | +0.011 |
leaf | +0.011 |
fly | +0.011 |
Neg logit contributions | |
nicely | -0.004 |
fair | -0.003 |
nice | -0.003 |
aws | -0.003 |
hockey | -0.003 |
Token | Mom |
Token ID | 29252 |
Feature activation | +9.49e-02 |
Pos logit contributions | |
keeping | +0.006 |
outage | +0.006 |
harm | +0.006 |
leaf | +0.006 |
rated | +0.006 |
Neg logit contributions | |
nicely | -0.003 |
fair | -0.002 |
aws | -0.002 |
uddle | -0.002 |
nice | -0.002 |
Token | says |
Token ID | 1139 |
Feature activation | +3.81e-02 |
Pos logit contributions | |
keeping | +0.172 |
outage | +0.149 |
harm | +0.147 |
eway | +0.143 |
leaf | +0.141 |
Neg logit contributions | |
nicely | -0.154 |
fair | -0.153 |
uddle | -0.151 |
aws | -0.145 |
cart | -0.141 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.087 |
leaf | +0.084 |
fright | +0.084 |
Judy | +0.083 |
outage | +0.083 |
Neg logit contributions | |
uddle | -0.034 |
aws | -0.033 |
fair | -0.032 |
favorite | -0.031 |
nicely | -0.031 |
Token | " |
Token ID | 366 |
Feature activation | -6.18e-02 |
Pos logit contributions | |
uddle | +0.108 |
nicely | +0.105 |
aws | +0.103 |
fair | +0.102 |
favorite | +0.095 |
Neg logit contributions | |
keeping | -0.365 |
outage | -0.347 |
leaf | -0.347 |
fright | -0.346 |
fly | -0.344 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.088 |
fair | +0.087 |
uddle | +0.083 |
aws | +0.083 |
favorite | +0.080 |
Neg logit contributions | |
keeping | -0.117 |
outage | -0.107 |
harm | -0.105 |
leaf | -0.105 |
fly | -0.104 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.531 |
outage | +0.507 |
leaf | +0.503 |
fright | +0.498 |
harm | +0.496 |
Neg logit contributions | |
nicely | -0.122 |
fair | -0.115 |
uddle | -0.112 |
aws | -0.108 |
favorite | -0.100 |
Token | I |
Token ID | 314 |
Feature activation | +4.24e-02 |
Pos logit contributions | |
fair | +0.197 |
nicely | +0.186 |
nice | +0.175 |
cart | +0.171 |
favorite | +0.171 |
Neg logit contributions | |
keeping | -0.325 |
fly | -0.287 |
leaf | -0.284 |
harm | -0.283 |
eway | -0.283 |
Token | am |
Token ID | 716 |
Feature activation | -4.18e-02 |
Pos logit contributions | |
keeping | +0.073 |
leaf | +0.062 |
fly | +0.061 |
icated | +0.060 |
eway | +0.059 |
Neg logit contributions | |
nicely | -0.076 |
fair | -0.073 |
uddle | -0.072 |
favorite | -0.072 |
nice | -0.070 |
Token | okay |
Token ID | 8788 |
Feature activation | +6.84e-02 |
Pos logit contributions | |
uddle | +0.052 |
aws | +0.051 |
nicely | +0.048 |
favorite | +0.047 |
sandbox | +0.047 |
Neg logit contributions | |
keeping | -0.081 |
outage | -0.079 |
fright | -0.078 |
Judy | -0.078 |
leaf | -0.077 |
Token | . |
Token ID | 13 |
Feature activation | +4.55e-03 |
Pos logit contributions | |
keeping | +0.155 |
leaf | +0.151 |
outage | +0.146 |
osh | +0.141 |
fly | +0.141 |
Neg logit contributions | |
nicely | -0.073 |
fair | -0.070 |
favorite | -0.062 |
uddle | -0.062 |
nice | -0.061 |
Token | pretty |
Token ID | 2495 |
Feature activation | -4.86e-03 |
Pos logit contributions | |
keeping | +0.118 |
outage | +0.111 |
rum | +0.108 |
leaf | +0.106 |
fly | +0.106 |
Neg logit contributions | |
fair | -0.042 |
nice | -0.038 |
nicely | -0.037 |
favorite | -0.035 |
cooler | -0.033 |
Token | !" |
Token ID | 2474 |
Feature activation | +6.75e-02 |
Pos logit contributions | |
fair | +0.006 |
nicely | +0.005 |
aws | +0.005 |
uddle | +0.005 |
favorite | +0.005 |
Neg logit contributions | |
keeping | -0.010 |
leaf | -0.010 |
outage | -0.010 |
Judy | -0.010 |
fright | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | +5.08e-03 |
Pos logit contributions | |
keeping | +0.159 |
outage | +0.158 |
harm | +0.152 |
rated | +0.150 |
leaf | +0.150 |
Neg logit contributions | |
nicely | -0.070 |
fair | -0.062 |
aws | -0.061 |
cart | -0.057 |
hockey | -0.056 |
Token |
|
Token ID | 198 |
Feature activation | +2.96e-03 |
Pos logit contributions | |
keeping | +0.013 |
outage | +0.013 |
rated | +0.013 |
fly | +0.013 |
leaf | +0.012 |
Neg logit contributions | |
nicely | -0.004 |
aws | -0.004 |
fair | -0.003 |
nice | -0.003 |
hockey | -0.003 |
Token | " |
Token ID | 1 |
Feature activation | -7.58e-02 |
Pos logit contributions | |
keeping | +0.007 |
outage | +0.007 |
harm | +0.007 |
fly | +0.007 |
leaf | +0.006 |
Neg logit contributions | |
nicely | -0.003 |
fair | -0.003 |
aws | -0.003 |
uddle | -0.002 |
hockey | -0.002 |
Token | Yes |
Token ID | 5297 |
Feature activation | +2.00e-01 |
Pos logit contributions | |
nicely | +0.117 |
fair | +0.114 |
uddle | +0.114 |
aws | +0.111 |
favorite | +0.108 |
Neg logit contributions | |
keeping | -0.131 |
leaf | -0.121 |
outage | -0.121 |
fright | -0.118 |
fly | -0.117 |
Token | , |
Token ID | 11 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
keeping | +0.528 |
outage | +0.502 |
leaf | +0.500 |
fright | +0.492 |
harm | +0.491 |
Neg logit contributions | |
nicely | -0.129 |
fair | -0.122 |
uddle | -0.118 |
aws | -0.113 |
favorite | -0.106 |
Token | it |
Token ID | 340 |
Feature activation | -5.67e-02 |
Pos logit contributions | |
fair | +0.189 |
nicely | +0.176 |
nice | +0.168 |
cart | +0.165 |
favorite | +0.158 |
Neg logit contributions | |
keeping | -0.336 |
harm | -0.311 |
outage | -0.307 |
rated | -0.306 |
rum | -0.300 |
Token | is |
Token ID | 318 |
Feature activation | -4.23e-03 |
Pos logit contributions | |
fair | +0.073 |
uddle | +0.068 |
nice | +0.068 |
nicely | +0.066 |
best | +0.064 |
Neg logit contributions | |
keeping | -0.127 |
leaf | -0.115 |
outage | -0.110 |
harm | -0.110 |
icated | -0.109 |
Token | !" |
Token ID | 2474 |
Feature activation | +6.32e-02 |
Pos logit contributions | |
fair | +0.007 |
nicely | +0.007 |
nice | +0.007 |
favorite | +0.006 |
best | +0.006 |
Neg logit contributions | |
keeping | -0.008 |
leaf | -0.007 |
outage | -0.007 |
osh | -0.007 |
fright | -0.007 |
Token | Ben |
Token ID | 3932 |
Feature activation | +1.99e-02 |
Pos logit contributions | |
outage | +0.149 |
rated | +0.145 |
harm | +0.143 |
keeping | +0.142 |
leaf | +0.136 |
Neg logit contributions | |
nicely | -0.066 |
fair | -0.064 |
cart | -0.060 |
aws | -0.059 |
hockey | -0.055 |
Token | their |
Token ID | 511 |
Feature activation | -6.46e-02 |
Pos logit contributions | |
fair | +0.136 |
nicely | +0.125 |
aws | +0.115 |
nice | +0.112 |
cart | +0.107 |
Neg logit contributions | |
keeping | -0.381 |
outage | -0.357 |
fly | -0.350 |
harm | -0.348 |
leaf | -0.339 |
Token | clothes |
Token ID | 8242 |
Feature activation | +1.09e-02 |
Pos logit contributions | |
fair | +0.063 |
nicely | +0.062 |
favorite | +0.062 |
cart | +0.056 |
uddle | +0.055 |
Neg logit contributions | |
keeping | -0.151 |
outage | -0.149 |
leaf | -0.148 |
Judy | -0.144 |
fly | -0.143 |
Token | , |
Token ID | 11 |
Feature activation | -1.44e-01 |
Pos logit contributions | |
keeping | +0.023 |
outage | +0.021 |
leaf | +0.021 |
fright | +0.021 |
harm | +0.021 |
Neg logit contributions | |
nicely | -0.013 |
fair | -0.012 |
uddle | -0.012 |
aws | -0.012 |
favorite | -0.012 |
Token | their |
Token ID | 511 |
Feature activation | -7.05e-02 |
Pos logit contributions | |
nicely | +0.156 |
fair | +0.154 |
favorite | +0.139 |
cart | +0.135 |
nice | +0.135 |
Neg logit contributions | |
keeping | -0.327 |
leaf | -0.311 |
outage | -0.309 |
harm | -0.303 |
fly | -0.299 |
Token | tooth |
Token ID | 16162 |
Feature activation | -6.54e-03 |
Pos logit contributions | |
nicely | +0.076 |
fair | +0.075 |
favorite | +0.072 |
uddle | +0.071 |
aws | +0.069 |
Neg logit contributions | |
keeping | -0.158 |
outage | -0.152 |
leaf | -0.150 |
Judy | -0.148 |
fly | -0.146 |
Token | br |
Token ID | 1671 |
Feature activation | -1.82e-01 |
Pos logit contributions | |
nicely | +0.008 |
uddle | +0.008 |
fair | +0.008 |
aws | +0.008 |
favorite | +0.007 |
Neg logit contributions | |
keeping | -0.013 |
leaf | -0.013 |
fright | -0.012 |
outage | -0.012 |
Judy | -0.012 |
Token | ushes |
Token ID | 17237 |
Feature activation | +5.23e-02 |
Pos logit contributions | |
nicely | +0.298 |
fair | +0.294 |
uddle | +0.290 |
aws | +0.284 |
favorite | +0.277 |
Neg logit contributions | |
keeping | -0.296 |
outage | -0.273 |
leaf | -0.269 |
fly | -0.263 |
fright | -0.262 |
Token | and |
Token ID | 290 |
Feature activation | +3.95e-02 |
Pos logit contributions | |
keeping | +0.123 |
leaf | +0.121 |
outage | +0.119 |
fright | +0.117 |
harm | +0.115 |
Neg logit contributions | |
nicely | -0.051 |
fair | -0.048 |
aws | -0.043 |
favorite | -0.041 |
nice | -0.040 |
Token | their |
Token ID | 511 |
Feature activation | -7.56e-02 |
Pos logit contributions | |
keeping | +0.074 |
outage | +0.073 |
leaf | +0.071 |
eway | +0.069 |
ull | +0.069 |
Neg logit contributions | |
fair | -0.056 |
nicely | -0.055 |
favorite | -0.051 |
nice | -0.050 |
cooler | -0.049 |
Token | cozy |
Token ID | 37438 |
Feature activation | -4.56e-02 |
Pos logit contributions | |
favorite | +0.099 |
fair | +0.098 |
nicely | +0.098 |
uddle | +0.090 |
cart | +0.089 |
Neg logit contributions | |
outage | -0.154 |
keeping | -0.151 |
leaf | -0.149 |
Judy | -0.145 |
eway | -0.144 |
Token | blankets |
Token ID | 34794 |
Feature activation | +3.09e-02 |
Pos logit contributions | |
nicely | +0.058 |
fair | +0.058 |
uddle | +0.057 |
favorite | +0.056 |
cart | +0.055 |
Neg logit contributions | |
leaf | -0.091 |
outage | -0.087 |
keeping | -0.085 |
Judy | -0.085 |
osh | -0.084 |
Token | animals |
Token ID | 4695 |
Feature activation | +3.24e-02 |
Pos logit contributions | |
keeping | +0.140 |
outage | +0.133 |
fly | +0.131 |
leaf | +0.126 |
Judy | +0.126 |
Neg logit contributions | |
fair | -0.068 |
nicely | -0.066 |
aws | -0.063 |
uddle | -0.063 |
favorite | -0.060 |
Token | like |
Token ID | 588 |
Feature activation | +2.24e-02 |
Pos logit contributions | |
keeping | +0.068 |
outage | +0.064 |
leaf | +0.063 |
fright | +0.062 |
harm | +0.062 |
Neg logit contributions | |
nicely | -0.038 |
fair | -0.037 |
uddle | -0.036 |
aws | -0.035 |
favorite | -0.034 |
Token | lions |
Token ID | 31420 |
Feature activation | +5.77e-03 |
Pos logit contributions | |
keeping | +0.045 |
outage | +0.043 |
leaf | +0.042 |
harm | +0.041 |
fright | +0.041 |
Neg logit contributions | |
fair | -0.029 |
nicely | -0.028 |
aws | -0.027 |
favorite | -0.026 |
uddle | -0.026 |
Token | , |
Token ID | 11 |
Feature activation | -1.52e-01 |
Pos logit contributions | |
keeping | +0.012 |
outage | +0.012 |
leaf | +0.012 |
fly | +0.012 |
harm | +0.012 |
Neg logit contributions | |
nicely | -0.006 |
fair | -0.006 |
uddle | -0.006 |
aws | -0.006 |
favorite | -0.006 |
Token | ze |
Token ID | 41271 |
Feature activation | +3.99e-02 |
Pos logit contributions | |
fair | +0.211 |
nicely | +0.208 |
aws | +0.203 |
uddle | +0.196 |
favorite | +0.196 |
Neg logit contributions | |
keeping | -0.291 |
outage | -0.273 |
leaf | -0.265 |
harm | -0.259 |
fright | -0.254 |
Token | br |
Token ID | 1671 |
Feature activation | -1.82e-01 |
Pos logit contributions | |
keeping | +0.084 |
leaf | +0.082 |
outage | +0.081 |
fright | +0.079 |
fly | +0.079 |
Neg logit contributions | |
nicely | -0.044 |
fair | -0.042 |
uddle | -0.041 |
favorite | -0.040 |
aws | -0.039 |
Token | as |
Token ID | 292 |
Feature activation | +1.07e-01 |
Pos logit contributions | |
nicely | +0.303 |
fair | +0.298 |
uddle | +0.293 |
aws | +0.284 |
favorite | +0.280 |
Neg logit contributions | |
keeping | -0.290 |
outage | -0.266 |
leaf | -0.265 |
fly | -0.258 |
fright | -0.256 |
Token | , |
Token ID | 11 |
Feature activation | -1.52e-01 |
Pos logit contributions | |
keeping | +0.239 |
outage | +0.226 |
leaf | +0.225 |
fright | +0.221 |
harm | +0.220 |
Neg logit contributions | |
nicely | -0.111 |
fair | -0.107 |
uddle | -0.105 |
aws | -0.104 |
favorite | -0.098 |
Token | and |
Token ID | 290 |
Feature activation | +3.96e-02 |
Pos logit contributions | |
nicely | +0.181 |
fair | +0.179 |
uddle | +0.172 |
aws | +0.171 |
favorite | +0.165 |
Neg logit contributions | |
keeping | -0.316 |
outage | -0.298 |
leaf | -0.294 |
harm | -0.287 |
fright | -0.287 |
Token | gir |
Token ID | 37370 |
Feature activation | -1.28e-02 |
Pos logit contributions | |
keeping | +0.077 |
outage | +0.072 |
leaf | +0.070 |
harm | +0.068 |
fright | +0.068 |
Neg logit contributions | |
fair | -0.054 |
nicely | -0.053 |
aws | -0.051 |
favorite | -0.050 |
uddle | -0.049 |
Token | aff |
Token ID | 2001 |
Feature activation | +3.38e-02 |
Pos logit contributions | |
nicely | +0.019 |
fair | +0.018 |
uddle | +0.018 |
favorite | +0.017 |
cart | +0.017 |
Neg logit contributions | |
keeping | -0.023 |
leaf | -0.022 |
fly | -0.021 |
harm | -0.021 |
outage | -0.021 |
Token | , |
Token ID | 11 |
Feature activation | -1.54e-01 |
Pos logit contributions | |
keeping | +0.008 |
outage | +0.008 |
leaf | +0.008 |
fright | +0.008 |
fly | +0.008 |
Neg logit contributions | |
nicely | -0.004 |
fair | -0.004 |
uddle | -0.004 |
aws | -0.003 |
favorite | -0.003 |
Token | monkeys |
Token ID | 26697 |
Feature activation | +2.95e-02 |
Pos logit contributions | |
fair | +0.203 |
nicely | +0.200 |
aws | +0.193 |
favorite | +0.186 |
uddle | +0.183 |
Neg logit contributions | |
keeping | -0.308 |
outage | -0.290 |
leaf | -0.283 |
harm | -0.275 |
rum | -0.270 |
Token | , |
Token ID | 11 |
Feature activation | -1.54e-01 |
Pos logit contributions | |
keeping | +0.067 |
fly | +0.064 |
outage | +0.064 |
leaf | +0.063 |
harm | +0.063 |
Neg logit contributions | |
nicely | -0.028 |
fair | -0.027 |
uddle | -0.027 |
aws | -0.025 |
favorite | -0.025 |
Token | and |
Token ID | 290 |
Feature activation | +3.93e-02 |
Pos logit contributions | |
fair | +0.175 |
nicely | +0.171 |
aws | +0.165 |
favorite | +0.159 |
cart | +0.156 |
Neg logit contributions | |
keeping | -0.335 |
outage | -0.320 |
leaf | -0.311 |
harm | -0.303 |
fright | -0.299 |
Token | ze |
Token ID | 41271 |
Feature activation | +3.87e-02 |
Pos logit contributions | |
keeping | +0.080 |
outage | +0.076 |
leaf | +0.074 |
harm | +0.071 |
fly | +0.071 |
Neg logit contributions | |
fair | -0.050 |
nicely | -0.049 |
aws | -0.046 |
favorite | -0.046 |
nice | -0.044 |
Token | br |
Token ID | 1671 |
Feature activation | -1.82e-01 |
Pos logit contributions | |
keeping | +0.079 |
leaf | +0.078 |
outage | +0.076 |
fright | +0.075 |
harm | +0.074 |
Neg logit contributions | |
nicely | -0.045 |
fair | -0.043 |
uddle | -0.042 |
favorite | -0.041 |
aws | -0.040 |
Token | as |
Token ID | 292 |
Feature activation | +1.06e-01 |
Pos logit contributions | |
nicely | +0.313 |
fair | +0.309 |
uddle | +0.302 |
cart | +0.291 |
favorite | +0.289 |
Neg logit contributions | |
keeping | -0.275 |
outage | -0.252 |
leaf | -0.251 |
fly | -0.248 |
Judy | -0.243 |
Token | . |
Token ID | 13 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
keeping | +0.224 |
leaf | +0.215 |
outage | +0.213 |
fright | +0.211 |
harm | +0.208 |
Neg logit contributions | |
nicely | -0.124 |
fair | -0.117 |
aws | -0.116 |
uddle | -0.113 |
cart | -0.108 |
Token | But |
Token ID | 887 |
Feature activation | +6.67e-02 |
Pos logit contributions | |
keeping | +0.020 |
Judy | +0.018 |
leaf | +0.018 |
outage | +0.018 |
fright | +0.018 |
Neg logit contributions | |
nicely | -0.012 |
fair | -0.012 |
uddle | -0.012 |
aws | -0.012 |
favorite | -0.011 |
Token | the |
Token ID | 262 |
Feature activation | -2.25e-02 |
Pos logit contributions | |
keeping | +0.129 |
fly | +0.120 |
Judy | +0.120 |
Maria | +0.118 |
harm | +0.118 |
Neg logit contributions | |
uddle | -0.088 |
aws | -0.084 |
nicely | -0.083 |
fair | -0.081 |
udd | -0.077 |
Token | one |
Token ID | 530 |
Feature activation | -1.04e-01 |
Pos logit contributions | |
fair | +0.027 |
nicely | +0.026 |
favorite | +0.025 |
aws | +0.025 |
uddle | +0.025 |
Neg logit contributions | |
keeping | -0.049 |
outage | -0.046 |
leaf | -0.045 |
rum | -0.044 |
Judy | -0.044 |
Token | wanted |
Token ID | 2227 |
Feature activation | +3.64e-02 |
Pos logit contributions | |
keeping | +0.073 |
outage | +0.066 |
leaf | +0.065 |
ull | +0.063 |
eway | +0.063 |
Neg logit contributions | |
nicely | -0.066 |
fair | -0.065 |
uddle | -0.062 |
aws | -0.062 |
favorite | -0.061 |
Token | to |
Token ID | 284 |
Feature activation | +8.59e-02 |
Pos logit contributions | |
keeping | +0.081 |
outage | +0.077 |
leaf | +0.076 |
fly | +0.074 |
fright | +0.074 |
Neg logit contributions | |
nicely | -0.039 |
fair | -0.039 |
aws | -0.036 |
favorite | -0.036 |
uddle | -0.035 |
Token | see |
Token ID | 766 |
Feature activation | -2.31e-02 |
Pos logit contributions | |
keeping | +0.167 |
outage | +0.154 |
leaf | +0.154 |
fly | +0.147 |
Judy | +0.146 |
Neg logit contributions | |
nicely | -0.119 |
fair | -0.117 |
uddle | -0.111 |
aws | -0.110 |
favorite | -0.110 |
Token | the |
Token ID | 262 |
Feature activation | -2.10e-02 |
Pos logit contributions | |
nicely | +0.031 |
fair | +0.030 |
aws | +0.029 |
favorite | +0.028 |
uddle | +0.028 |
Neg logit contributions | |
keeping | -0.047 |
outage | -0.044 |
leaf | -0.043 |
fright | -0.041 |
fly | -0.041 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
fair | +0.026 |
favorite | +0.025 |
aws | +0.024 |
nice | +0.023 |
uddle | +0.023 |
Neg logit contributions | |
keeping | -0.049 |
outage | -0.044 |
rum | -0.044 |
leaf | -0.044 |
Judy | -0.043 |
Token | br |
Token ID | 1671 |
Feature activation | -1.81e-01 |
Pos logit contributions | |
keeping | +0.090 |
leaf | +0.087 |
outage | +0.086 |
fright | +0.084 |
fly | +0.084 |
Neg logit contributions | |
nicely | -0.043 |
fair | -0.042 |
uddle | -0.041 |
aws | -0.040 |
favorite | -0.039 |
Token | as |
Token ID | 292 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
nicely | +0.301 |
fair | +0.295 |
uddle | +0.292 |
aws | +0.288 |
favorite | +0.281 |
Neg logit contributions | |
keeping | -0.291 |
outage | -0.268 |
leaf | -0.266 |
fright | -0.259 |
harm | -0.258 |
Token | the |
Token ID | 262 |
Feature activation | -2.21e-02 |
Pos logit contributions | |
keeping | +0.249 |
leaf | +0.235 |
outage | +0.235 |
fright | +0.229 |
harm | +0.228 |
Neg logit contributions | |
nicely | -0.113 |
fair | -0.108 |
aws | -0.103 |
uddle | -0.103 |
favorite | -0.097 |
Token | most |
Token ID | 749 |
Feature activation | +5.11e-02 |
Pos logit contributions | |
fair | +0.028 |
nicely | +0.026 |
favorite | +0.025 |
best | +0.025 |
aws | +0.025 |
Neg logit contributions | |
keeping | -0.047 |
outage | -0.047 |
leaf | -0.046 |
rum | -0.045 |
Judy | -0.044 |
Token | . |
Token ID | 13 |
Feature activation | +5.60e-03 |
Pos logit contributions | |
keeping | +0.112 |
leaf | +0.101 |
outage | +0.101 |
Judy | +0.100 |
fly | +0.098 |
Neg logit contributions | |
fair | -0.063 |
nicely | -0.061 |
favorite | -0.058 |
uddle | -0.057 |
nice | -0.056 |
Token | They |
Token ID | 1119 |
Feature activation | -3.50e-02 |
Pos logit contributions | |
keeping | +0.011 |
Judy | +0.010 |
outage | +0.010 |
fright | +0.010 |
leaf | +0.010 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.007 |
uddle | -0.007 |
aws | -0.007 |
favorite | -0.007 |
Token | es |
Token ID | 274 |
Feature activation | +1.50e-02 |
Pos logit contributions | |
keeping | +0.051 |
outage | +0.047 |
leaf | +0.046 |
fright | +0.045 |
harm | +0.044 |
Neg logit contributions | |
nicely | -0.064 |
fair | -0.062 |
aws | -0.062 |
uddle | -0.061 |
favorite | -0.059 |
Token | , |
Token ID | 11 |
Feature activation | -1.52e-01 |
Pos logit contributions | |
keeping | +0.032 |
fly | +0.030 |
outage | +0.030 |
leaf | +0.030 |
harm | +0.029 |
Neg logit contributions | |
nicely | -0.016 |
uddle | -0.016 |
fair | -0.016 |
aws | -0.015 |
favorite | -0.015 |
Token | and |
Token ID | 290 |
Feature activation | +3.81e-02 |
Pos logit contributions | |
uddle | +0.239 |
nicely | +0.233 |
fair | +0.211 |
aws | +0.209 |
cart | +0.202 |
Neg logit contributions | |
keeping | -0.250 |
fly | -0.248 |
Judy | -0.239 |
fright | -0.234 |
leaf | -0.230 |
Token | fast |
Token ID | 3049 |
Feature activation | +2.76e-02 |
Pos logit contributions | |
keeping | +0.075 |
outage | +0.070 |
leaf | +0.069 |
fright | +0.068 |
harm | +0.067 |
Neg logit contributions | |
nicely | -0.050 |
fair | -0.049 |
uddle | -0.047 |
aws | -0.047 |
favorite | -0.046 |
Token | ze |
Token ID | 41271 |
Feature activation | +3.90e-02 |
Pos logit contributions | |
keeping | +0.053 |
outage | +0.048 |
harm | +0.048 |
fright | +0.048 |
leaf | +0.048 |
Neg logit contributions | |
nicely | -0.037 |
uddle | -0.036 |
fair | -0.035 |
aws | -0.034 |
favorite | -0.033 |
Token | br |
Token ID | 1671 |
Feature activation | -1.81e-01 |
Pos logit contributions | |
keeping | +0.079 |
leaf | +0.079 |
outage | +0.076 |
fright | +0.075 |
harm | +0.075 |
Neg logit contributions | |
nicely | -0.045 |
fair | -0.043 |
uddle | -0.043 |
favorite | -0.042 |
aws | -0.039 |
Token | as |
Token ID | 292 |
Feature activation | +1.07e-01 |
Pos logit contributions | |
nicely | +0.307 |
fair | +0.303 |
uddle | +0.299 |
aws | +0.286 |
cart | +0.286 |
Neg logit contributions | |
keeping | -0.279 |
outage | -0.257 |
leaf | -0.255 |
fly | -0.251 |
Judy | -0.246 |
Token | . |
Token ID | 13 |
Feature activation | +1.22e-02 |
Pos logit contributions | |
keeping | +0.226 |
leaf | +0.216 |
outage | +0.215 |
fright | +0.212 |
harm | +0.208 |
Neg logit contributions | |
nicely | -0.125 |
fair | -0.119 |
aws | -0.117 |
uddle | -0.114 |
favorite | -0.109 |
Token | Tim |
Token ID | 5045 |
Feature activation | +4.49e-02 |
Pos logit contributions | |
keeping | +0.024 |
outage | +0.023 |
leaf | +0.022 |
fright | +0.022 |
fly | +0.022 |
Neg logit contributions | |
nicely | -0.015 |
fair | -0.015 |
uddle | -0.015 |
aws | -0.015 |
favorite | -0.014 |
Token | was |
Token ID | 373 |
Feature activation | +9.33e-04 |
Pos logit contributions | |
keeping | +0.080 |
outage | +0.074 |
fly | +0.073 |
leaf | +0.073 |
harm | +0.073 |
Neg logit contributions | |
nicely | -0.065 |
uddle | -0.063 |
fair | -0.063 |
aws | -0.061 |
favorite | -0.059 |
Token | very |
Token ID | 845 |
Feature activation | -5.74e-03 |
Pos logit contributions | |
keeping | +0.002 |
leaf | +0.002 |
Judy | +0.002 |
fright | +0.002 |
fly | +0.002 |
Neg logit contributions | |
uddle | -0.001 |
nicely | -0.001 |
aws | -0.001 |
cart | -0.001 |
fair | -0.001 |
Token | gir |
Token ID | 37370 |
Feature activation | -8.39e-03 |
Pos logit contributions | |
nicely | +0.174 |
fair | +0.171 |
uddle | +0.165 |
aws | +0.164 |
favorite | +0.158 |
Neg logit contributions | |
keeping | -0.305 |
outage | -0.287 |
leaf | -0.284 |
harm | -0.278 |
fright | -0.277 |
Token | aff |
Token ID | 2001 |
Feature activation | +3.69e-02 |
Pos logit contributions | |
nicely | +0.013 |
uddle | +0.012 |
fair | +0.012 |
favorite | +0.012 |
cooler | +0.012 |
Neg logit contributions | |
keeping | -0.013 |
leaf | -0.013 |
harm | -0.013 |
fly | -0.013 |
jo | -0.013 |
Token | es |
Token ID | 274 |
Feature activation | +1.72e-02 |
Pos logit contributions | |
keeping | +0.054 |
outage | +0.050 |
leaf | +0.049 |
fright | +0.048 |
harm | +0.047 |
Neg logit contributions | |
nicely | -0.067 |
fair | -0.065 |
aws | -0.065 |
uddle | -0.064 |
favorite | -0.062 |
Token | and |
Token ID | 290 |
Feature activation | +4.41e-02 |
Pos logit contributions | |
keeping | +0.039 |
outage | +0.037 |
leaf | +0.036 |
fly | +0.036 |
fright | +0.036 |
Neg logit contributions | |
nicely | -0.017 |
uddle | -0.016 |
fair | -0.016 |
aws | -0.015 |
favorite | -0.015 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.02e-02 |
Pos logit contributions | |
keeping | +0.093 |
outage | +0.089 |
leaf | +0.085 |
harm | +0.083 |
rum | +0.083 |
Neg logit contributions | |
fair | -0.053 |
nicely | -0.051 |
aws | -0.049 |
favorite | -0.049 |
sandbox | -0.048 |
Token | br |
Token ID | 1671 |
Feature activation | -1.81e-01 |
Pos logit contributions | |
keeping | +0.083 |
leaf | +0.081 |
outage | +0.080 |
fright | +0.078 |
harm | +0.078 |
Neg logit contributions | |
nicely | -0.046 |
fair | -0.044 |
uddle | -0.043 |
favorite | -0.042 |
aws | -0.041 |
Token | as |
Token ID | 292 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
nicely | +0.309 |
fair | +0.305 |
uddle | +0.300 |
cart | +0.287 |
aws | +0.286 |
Neg logit contributions | |
keeping | -0.276 |
outage | -0.253 |
leaf | -0.252 |
fly | -0.248 |
Judy | -0.243 |
Token | . |
Token ID | 13 |
Feature activation | +1.64e-02 |
Pos logit contributions | |
keeping | +0.235 |
leaf | +0.227 |
outage | +0.225 |
fright | +0.222 |
Judy | +0.219 |
Neg logit contributions | |
nicely | -0.121 |
fair | -0.114 |
aws | -0.114 |
uddle | -0.108 |
cart | -0.104 |
Token | They |
Token ID | 1119 |
Feature activation | -3.36e-02 |
Pos logit contributions | |
keeping | +0.032 |
outage | +0.029 |
leaf | +0.029 |
Judy | +0.029 |
fright | +0.029 |
Neg logit contributions | |
nicely | -0.022 |
fair | -0.021 |
uddle | -0.021 |
aws | -0.020 |
favorite | -0.020 |
Token | took |
Token ID | 1718 |
Feature activation | -7.73e-03 |
Pos logit contributions | |
fair | +0.047 |
nicely | +0.046 |
aws | +0.045 |
favorite | +0.043 |
sandbox | +0.042 |
Neg logit contributions | |
keeping | -0.067 |
outage | -0.062 |
leaf | -0.059 |
harm | -0.059 |
eway | -0.058 |
Token | many |
Token ID | 867 |
Feature activation | +4.63e-03 |
Pos logit contributions | |
nicely | +0.011 |
fair | +0.011 |
aws | +0.010 |
uddle | +0.010 |
favorite | +0.010 |
Neg logit contributions | |
keeping | -0.015 |
outage | -0.014 |
leaf | -0.013 |
harm | -0.013 |
fly | -0.013 |
Token | scree |
Token ID | 43465 |
Feature activation | +3.65e-02 |
Pos logit contributions | |
keeping | +0.077 |
outage | +0.070 |
leaf | +0.068 |
Judy | +0.065 |
eway | +0.064 |
Neg logit contributions | |
nicely | -0.048 |
fair | -0.047 |
aws | -0.044 |
uddle | -0.043 |
nice | -0.042 |
Token | ched |
Token ID | 1740 |
Feature activation | -2.94e-02 |
Pos logit contributions | |
keeping | +0.060 |
outage | +0.053 |
Judy | +0.050 |
leaf | +0.050 |
Maria | +0.049 |
Neg logit contributions | |
fair | -0.064 |
nicely | -0.064 |
aws | -0.063 |
uddle | -0.061 |
nice | -0.059 |
Token | . |
Token ID | 13 |
Feature activation | +1.19e-02 |
Pos logit contributions | |
nicely | +0.037 |
fair | +0.036 |
aws | +0.033 |
nice | +0.032 |
cart | +0.032 |
Neg logit contributions | |
outage | -0.063 |
keeping | -0.063 |
leaf | -0.059 |
harm | -0.056 |
fly | -0.055 |
Token | The |
Token ID | 383 |
Feature activation | +3.81e-02 |
Pos logit contributions | |
keeping | +0.026 |
outage | +0.025 |
harm | +0.025 |
leaf | +0.025 |
fright | +0.024 |
Neg logit contributions | |
nicely | -0.014 |
aws | -0.013 |
fair | -0.012 |
gym | -0.012 |
cart | -0.011 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.09e-02 |
Pos logit contributions | |
keeping | +0.083 |
outage | +0.077 |
rum | +0.074 |
leaf | +0.074 |
eway | +0.071 |
Neg logit contributions | |
fair | -0.052 |
nicely | -0.048 |
cart | -0.046 |
nice | -0.046 |
favorite | -0.046 |
Token | br |
Token ID | 1671 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
keeping | +0.091 |
outage | +0.084 |
leaf | +0.082 |
fright | +0.081 |
harm | +0.081 |
Neg logit contributions | |
nicely | -0.045 |
fair | -0.044 |
aws | -0.043 |
uddle | -0.043 |
favorite | -0.039 |
Token | as |
Token ID | 292 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
nicely | +0.257 |
aws | +0.257 |
uddle | +0.250 |
fair | +0.247 |
favorite | +0.242 |
Neg logit contributions | |
keeping | -0.343 |
leaf | -0.311 |
outage | -0.309 |
harm | -0.307 |
fright | -0.306 |
Token | ne |
Token ID | 497 |
Feature activation | -8.88e-02 |
Pos logit contributions | |
keeping | +0.227 |
outage | +0.212 |
leaf | +0.208 |
Judy | +0.199 |
icated | +0.196 |
Neg logit contributions | |
nicely | -0.141 |
fair | -0.136 |
aws | -0.128 |
uddle | -0.127 |
cart | -0.125 |
Token | ighed |
Token ID | 12570 |
Feature activation | +6.15e-02 |
Pos logit contributions | |
nicely | +0.161 |
fair | +0.158 |
uddle | +0.156 |
aws | +0.153 |
favorite | +0.151 |
Neg logit contributions | |
keeping | -0.129 |
outage | -0.118 |
leaf | -0.116 |
fly | -0.115 |
fright | -0.114 |
Token | . |
Token ID | 13 |
Feature activation | +1.11e-02 |
Pos logit contributions | |
keeping | +0.129 |
outage | +0.121 |
leaf | +0.121 |
fright | +0.119 |
harm | +0.118 |
Neg logit contributions | |
nicely | -0.071 |
fair | -0.069 |
uddle | -0.068 |
aws | -0.067 |
favorite | -0.064 |
Token | The |
Token ID | 383 |
Feature activation | +3.60e-02 |
Pos logit contributions | |
keeping | +0.024 |
outage | +0.023 |
harm | +0.023 |
leaf | +0.023 |
fright | +0.022 |
Neg logit contributions | |
nicely | -0.013 |
aws | -0.012 |
fair | -0.012 |
gym | -0.011 |
uddle | -0.011 |
Token | can |
Token ID | 460 |
Feature activation | +6.28e-02 |
Pos logit contributions | |
keeping | +0.074 |
leaf | +0.066 |
eway | +0.065 |
outage | +0.064 |
jo | +0.063 |
Neg logit contributions | |
fair | -0.050 |
nicely | -0.050 |
uddle | -0.046 |
favorite | -0.045 |
nice | -0.045 |
Token | 't |
Token ID | 470 |
Feature activation | +1.25e-01 |
Pos logit contributions | |
keeping | +0.100 |
outage | +0.092 |
leaf | +0.091 |
fright | +0.088 |
harm | +0.088 |
Neg logit contributions | |
nicely | -0.106 |
fair | -0.104 |
uddle | -0.103 |
aws | -0.101 |
favorite | -0.099 |
Token | . |
Token ID | 13 |
Feature activation | -1.66e-03 |
Pos logit contributions | |
keeping | +0.231 |
leaf | +0.215 |
outage | +0.207 |
eway | +0.199 |
jo | +0.198 |
Neg logit contributions | |
nicely | -0.199 |
fair | -0.194 |
uddle | -0.182 |
favorite | -0.177 |
nice | -0.177 |
Token | The |
Token ID | 383 |
Feature activation | +3.58e-02 |
Pos logit contributions | |
nicely | +0.003 |
favorite | +0.003 |
fair | +0.002 |
uddle | +0.002 |
aws | +0.002 |
Neg logit contributions | |
keeping | -0.003 |
leaf | -0.003 |
harm | -0.003 |
outage | -0.003 |
eway | -0.003 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.42e-02 |
Pos logit contributions | |
keeping | +0.078 |
leaf | +0.073 |
outage | +0.072 |
jo | +0.069 |
Judy | +0.069 |
Neg logit contributions | |
fair | -0.043 |
nicely | -0.043 |
uddle | -0.040 |
favorite | -0.040 |
cart | -0.039 |
Token | br |
Token ID | 1671 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
keeping | +0.114 |
outage | +0.103 |
Judy | +0.100 |
fright | +0.100 |
fly | +0.100 |
Neg logit contributions | |
nicely | -0.036 |
fair | -0.036 |
uddle | -0.035 |
aws | -0.034 |
nice | -0.030 |
Token | as |
Token ID | 292 |
Feature activation | +1.15e-01 |
Pos logit contributions | |
nicely | +0.294 |
fair | +0.286 |
uddle | +0.284 |
aws | +0.283 |
favorite | +0.275 |
Neg logit contributions | |
keeping | -0.301 |
outage | -0.274 |
leaf | -0.274 |
fright | -0.268 |
harm | -0.267 |
Token | have |
Token ID | 423 |
Feature activation | +1.80e-02 |
Pos logit contributions | |
keeping | +0.229 |
leaf | +0.211 |
Judy | +0.204 |
outage | +0.204 |
eway | +0.203 |
Neg logit contributions | |
nicely | -0.165 |
fair | -0.160 |
uddle | -0.150 |
nice | -0.144 |
cooler | -0.143 |
Token | their |
Token ID | 511 |
Feature activation | -7.23e-02 |
Pos logit contributions | |
keeping | +0.038 |
outage | +0.035 |
leaf | +0.035 |
Judy | +0.034 |
rated | +0.034 |
Neg logit contributions | |
fair | -0.024 |
nicely | -0.022 |
nice | -0.021 |
favorite | -0.021 |
aws | -0.021 |
Token | own |
Token ID | 898 |
Feature activation | +6.45e-02 |
Pos logit contributions | |
favorite | +0.101 |
fair | +0.094 |
nicely | +0.090 |
nice | +0.085 |
uddle | +0.084 |
Neg logit contributions | |
keeping | -0.151 |
outage | -0.146 |
leaf | -0.144 |
ull | -0.137 |
Judy | -0.137 |
Token | food |
Token ID | 2057 |
Feature activation | -5.46e-02 |
Pos logit contributions | |
leaf | +0.124 |
keeping | +0.124 |
outage | +0.120 |
Judy | +0.117 |
Warning | +0.117 |
Neg logit contributions | |
fair | -0.087 |
favorite | -0.086 |
nicely | -0.085 |
uddle | -0.081 |
nice | -0.080 |
Token | wand |
Token ID | 11569 |
Feature activation | +1.37e-02 |
Pos logit contributions | |
favorite | +0.088 |
fair | +0.083 |
nicely | +0.083 |
uddle | +0.080 |
cart | +0.077 |
Neg logit contributions | |
keeping | -0.125 |
outage | -0.119 |
leaf | -0.118 |
Warning | -0.115 |
Judy | -0.115 |
Token | and |
Token ID | 290 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
keeping | +0.032 |
leaf | +0.031 |
outage | +0.030 |
harm | +0.029 |
fright | +0.029 |
Neg logit contributions | |
nicely | -0.014 |
fair | -0.013 |
favorite | -0.012 |
uddle | -0.012 |
aws | -0.012 |
Token | say |
Token ID | 910 |
Feature activation | +5.56e-02 |
Pos logit contributions | |
keeping | +0.077 |
leaf | +0.073 |
outage | +0.072 |
rum | +0.071 |
ull | +0.069 |
Neg logit contributions | |
nicely | -0.063 |
fair | -0.062 |
favorite | -0.056 |
nice | -0.055 |
sandbox | -0.055 |
Token | " |
Token ID | 366 |
Feature activation | -6.64e-02 |
Pos logit contributions | |
keeping | +0.108 |
outage | +0.104 |
fright | +0.104 |
Judy | +0.103 |
leaf | +0.103 |
Neg logit contributions | |
uddle | -0.070 |
aws | -0.066 |
nicely | -0.065 |
fair | -0.062 |
favorite | -0.061 |
Token | A |
Token ID | 32 |
Feature activation | +8.52e-02 |
Pos logit contributions | |
fair | +0.099 |
nicely | +0.094 |
aws | +0.092 |
hockey | +0.086 |
nice | +0.086 |
Neg logit contributions | |
outage | -0.124 |
keeping | -0.117 |
Judy | -0.110 |
rum | -0.110 |
Maria | -0.108 |
Token | br |
Token ID | 1671 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
keeping | +0.207 |
Judy | +0.176 |
leaf | +0.175 |
outage | +0.169 |
Maria | +0.169 |
Neg logit contributions | |
fair | -0.101 |
nicely | -0.095 |
nice | -0.092 |
uddle | -0.089 |
hockey | -0.084 |
Token | ac |
Token ID | 330 |
Feature activation | +1.04e-01 |
Pos logit contributions | |
nicely | +0.340 |
fair | +0.330 |
uddle | +0.329 |
aws | +0.326 |
favorite | +0.320 |
Neg logit contributions | |
keeping | -0.262 |
leaf | -0.231 |
outage | -0.229 |
harm | -0.225 |
fright | -0.224 |
Token | ad |
Token ID | 324 |
Feature activation | -1.78e-05 |
Pos logit contributions | |
leaf | +0.148 |
outage | +0.147 |
keeping | +0.142 |
Maria | +0.131 |
Judy | +0.124 |
Neg logit contributions | |
fair | -0.201 |
aws | -0.185 |
nicely | -0.183 |
nice | -0.181 |
best | -0.173 |
Token | ab |
Token ID | 397 |
Feature activation | +4.13e-02 |
Pos logit contributions | |
nicely | +0.000 |
fair | +0.000 |
nice | +0.000 |
uddle | +0.000 |
aws | +0.000 |
Neg logit contributions | |
keeping | -0.000 |
fright | -0.000 |
leaf | -0.000 |
harm | -0.000 |
outage | -0.000 |
Token | ra |
Token ID | 430 |
Feature activation | +1.53e-01 |
Pos logit contributions | |
keeping | +0.055 |
outage | +0.053 |
leaf | +0.052 |
harm | +0.050 |
icated | +0.047 |
Neg logit contributions | |
nicely | -0.082 |
fair | -0.080 |
favorite | -0.078 |
uddle | -0.078 |
aws | -0.077 |
Token | !" |
Token ID | 2474 |
Feature activation | +4.94e-02 |
Pos logit contributions | |
keeping | +0.251 |
outage | +0.228 |
leaf | +0.227 |
fright | +0.216 |
harm | +0.215 |
Neg logit contributions | |
nicely | -0.263 |
fair | -0.255 |
aws | -0.243 |
uddle | -0.240 |
nice | -0.235 |
Token | and |
Token ID | 290 |
Feature activation | +3.98e-02 |
Pos logit contributions | |
keeping | +0.042 |
outage | +0.040 |
fly | +0.039 |
leaf | +0.038 |
ull | +0.038 |
Neg logit contributions | |
uddle | -0.029 |
nicely | -0.028 |
fair | -0.028 |
aws | -0.027 |
favorite | -0.026 |
Token | looked |
Token ID | 3114 |
Feature activation | -9.64e-02 |
Pos logit contributions | |
keeping | +0.075 |
outage | +0.074 |
leaf | +0.069 |
eway | +0.069 |
harm | +0.068 |
Neg logit contributions | |
fair | -0.056 |
nicely | -0.056 |
favorite | -0.053 |
cart | -0.052 |
aws | -0.050 |
Token | for |
Token ID | 329 |
Feature activation | +5.47e-02 |
Pos logit contributions | |
uddle | +0.084 |
nicely | +0.083 |
aws | +0.081 |
fair | +0.079 |
gym | +0.073 |
Neg logit contributions | |
keeping | -0.220 |
leaf | -0.214 |
fright | -0.211 |
outage | -0.209 |
Judy | -0.208 |
Token | the |
Token ID | 262 |
Feature activation | -2.37e-02 |
Pos logit contributions | |
keeping | +0.109 |
outage | +0.104 |
leaf | +0.100 |
rated | +0.098 |
fly | +0.097 |
Neg logit contributions | |
fair | -0.075 |
nicely | -0.073 |
aws | -0.070 |
favorite | -0.069 |
nice | -0.068 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.12e-02 |
Pos logit contributions | |
fair | +0.028 |
favorite | +0.026 |
nice | +0.025 |
best | +0.024 |
nicely | +0.024 |
Neg logit contributions | |
keeping | -0.056 |
outage | -0.052 |
rum | -0.051 |
leaf | -0.050 |
eway | -0.050 |
Token | br |
Token ID | 1671 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
keeping | +0.094 |
leaf | +0.090 |
outage | +0.090 |
fright | +0.087 |
harm | +0.087 |
Neg logit contributions | |
nicely | -0.040 |
fair | -0.038 |
uddle | -0.037 |
aws | -0.036 |
favorite | -0.035 |
Token | as |
Token ID | 292 |
Feature activation | +1.11e-01 |
Pos logit contributions | |
nicely | +0.300 |
fair | +0.296 |
uddle | +0.292 |
aws | +0.284 |
favorite | +0.279 |
Neg logit contributions | |
keeping | -0.284 |
outage | -0.263 |
leaf | -0.260 |
fly | -0.255 |
fright | -0.252 |
Token | . |
Token ID | 13 |
Feature activation | +6.24e-03 |
Pos logit contributions | |
keeping | +0.241 |
leaf | +0.236 |
fright | +0.231 |
outage | +0.231 |
harm | +0.226 |
Neg logit contributions | |
nicely | -0.121 |
fair | -0.111 |
aws | -0.109 |
cart | -0.102 |
favorite | -0.101 |
Token | They |
Token ID | 1119 |
Feature activation | -3.82e-02 |
Pos logit contributions | |
keeping | +0.012 |
outage | +0.011 |
Judy | +0.011 |
leaf | +0.011 |
fright | +0.011 |
Neg logit contributions | |
uddle | -0.008 |
fair | -0.008 |
nicely | -0.008 |
aws | -0.008 |
favorite | -0.007 |
Token | saw |
Token ID | 2497 |
Feature activation | -1.88e-02 |
Pos logit contributions | |
nicely | +0.052 |
fair | +0.051 |
uddle | +0.051 |
aws | +0.049 |
favorite | +0.048 |
Neg logit contributions | |
keeping | -0.072 |
outage | -0.068 |
leaf | -0.067 |
fright | -0.066 |
harm | -0.066 |
Token | one |
Token ID | 530 |
Feature activation | -1.03e-01 |
Pos logit contributions | |
nicely | +0.025 |
uddle | +0.024 |
fair | +0.024 |
aws | +0.023 |
favorite | +0.023 |
Neg logit contributions | |
keeping | -0.036 |
outage | -0.034 |
leaf | -0.034 |
fright | -0.033 |
fly | -0.033 |
Token | not |
Token ID | 407 |
Feature activation | +9.70e-02 |
Pos logit contributions | |
keeping | +0.108 |
fright | +0.102 |
outage | +0.102 |
harm | +0.101 |
leaf | +0.100 |
Neg logit contributions | |
uddle | -0.071 |
nicely | -0.070 |
fair | -0.069 |
aws | -0.069 |
cart | -0.065 |
Token | see |
Token ID | 766 |
Feature activation | -2.46e-02 |
Pos logit contributions | |
keeping | +0.156 |
outage | +0.141 |
leaf | +0.140 |
fly | +0.135 |
eway | +0.132 |
Neg logit contributions | |
nicely | -0.169 |
fair | -0.169 |
aws | -0.156 |
nice | -0.155 |
favorite | -0.154 |
Token | any |
Token ID | 597 |
Feature activation | -5.36e-02 |
Pos logit contributions | |
fair | +0.030 |
nicely | +0.030 |
favorite | +0.028 |
aws | +0.028 |
nice | +0.026 |
Neg logit contributions | |
keeping | -0.055 |
outage | -0.052 |
leaf | -0.051 |
rated | -0.048 |
eway | -0.048 |
Token | baby |
Token ID | 5156 |
Feature activation | +7.34e-02 |
Pos logit contributions | |
fair | +0.061 |
aws | +0.057 |
nicely | +0.056 |
uddle | +0.054 |
nice | +0.054 |
Neg logit contributions | |
keeping | -0.125 |
outage | -0.111 |
leaf | -0.111 |
Judy | -0.109 |
Maria | -0.108 |
Token | ze |
Token ID | 41271 |
Feature activation | +3.97e-02 |
Pos logit contributions | |
keeping | +0.134 |
outage | +0.124 |
leaf | +0.121 |
fright | +0.118 |
Judy | +0.116 |
Neg logit contributions | |
fair | -0.111 |
aws | -0.109 |
nicely | -0.107 |
uddle | -0.103 |
favorite | -0.103 |
Token | br |
Token ID | 1671 |
Feature activation | -1.79e-01 |
Pos logit contributions | |
keeping | +0.086 |
leaf | +0.084 |
outage | +0.083 |
fright | +0.081 |
harm | +0.081 |
Neg logit contributions | |
nicely | -0.042 |
fair | -0.040 |
uddle | -0.039 |
favorite | -0.038 |
aws | -0.037 |
Token | as |
Token ID | 292 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
nicely | +0.274 |
aws | +0.267 |
fair | +0.264 |
uddle | +0.264 |
favorite | +0.254 |
Neg logit contributions | |
keeping | -0.316 |
leaf | -0.290 |
outage | -0.289 |
fright | -0.286 |
harm | -0.282 |
Token | . |
Token ID | 13 |
Feature activation | +5.64e-03 |
Pos logit contributions | |
keeping | +0.238 |
leaf | +0.228 |
outage | +0.225 |
Judy | +0.218 |
fright | +0.217 |
Neg logit contributions | |
nicely | -0.124 |
fair | -0.116 |
aws | -0.113 |
uddle | -0.107 |
cart | -0.104 |
Token |
|
Token ID | 198 |
Feature activation | -1.01e-02 |
Pos logit contributions | |
keeping | +0.012 |
outage | +0.011 |
leaf | +0.011 |
harm | +0.011 |
fright | +0.011 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.006 |
aws | -0.006 |
uddle | -0.006 |
favorite | -0.006 |
Token |
|
Token ID | 198 |
Feature activation | -9.63e-03 |
Pos logit contributions | |
nicely | +0.010 |
aws | +0.009 |
uddle | +0.009 |
fair | +0.008 |
favorite | +0.008 |
Neg logit contributions | |
keeping | -0.025 |
leaf | -0.024 |
fly | -0.023 |
outage | -0.023 |
harm | -0.022 |
Token | " |
Token ID | 1 |
Feature activation | -8.58e-02 |
Pos logit contributions | |
uddle | +0.011 |
nicely | +0.011 |
fair | +0.011 |
favorite | +0.010 |
aws | +0.010 |
Neg logit contributions | |
keeping | -0.020 |
outage | -0.019 |
leaf | -0.019 |
fright | -0.019 |
Judy | -0.019 |
Token | gir |
Token ID | 37370 |
Feature activation | -6.13e-03 |
Pos logit contributions | |
nicely | +0.202 |
fair | +0.201 |
aws | +0.192 |
uddle | +0.188 |
favorite | +0.187 |
Neg logit contributions | |
keeping | -0.285 |
outage | -0.265 |
leaf | -0.259 |
harm | -0.253 |
fright | -0.251 |
Token | aff |
Token ID | 2001 |
Feature activation | +3.93e-02 |
Pos logit contributions | |
nicely | +0.010 |
uddle | +0.009 |
fair | +0.009 |
favorite | +0.009 |
cooler | +0.009 |
Neg logit contributions | |
leaf | -0.010 |
keeping | -0.010 |
harm | -0.009 |
fly | -0.009 |
icated | -0.009 |
Token | es |
Token ID | 274 |
Feature activation | +1.94e-02 |
Pos logit contributions | |
keeping | +0.058 |
outage | +0.053 |
leaf | +0.052 |
fright | +0.051 |
harm | +0.050 |
Neg logit contributions | |
nicely | -0.071 |
fair | -0.070 |
aws | -0.069 |
uddle | -0.068 |
favorite | -0.066 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
keeping | +0.045 |
outage | +0.042 |
leaf | +0.042 |
fly | +0.042 |
fright | +0.042 |
Neg logit contributions | |
nicely | -0.018 |
uddle | -0.017 |
fair | -0.017 |
aws | -0.016 |
favorite | -0.016 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.33e-02 |
Pos logit contributions | |
uddle | +0.214 |
nicely | +0.210 |
fair | +0.199 |
aws | +0.197 |
favorite | +0.192 |
Neg logit contributions | |
keeping | -0.263 |
fly | -0.254 |
leaf | -0.248 |
fright | -0.248 |
Judy | -0.245 |
Token | br |
Token ID | 1671 |
Feature activation | -1.79e-01 |
Pos logit contributions | |
leaf | +0.087 |
keeping | +0.085 |
outage | +0.084 |
fly | +0.083 |
osh | +0.082 |
Neg logit contributions | |
nicely | -0.050 |
uddle | -0.048 |
favorite | -0.048 |
fair | -0.048 |
aws | -0.045 |
Token | as |
Token ID | 292 |
Feature activation | +1.13e-01 |
Pos logit contributions | |
nicely | +0.311 |
fair | +0.308 |
uddle | +0.302 |
cart | +0.292 |
favorite | +0.288 |
Neg logit contributions | |
keeping | -0.267 |
outage | -0.245 |
leaf | -0.244 |
fly | -0.243 |
Judy | -0.235 |
Token | , |
Token ID | 11 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
keeping | +0.276 |
outage | +0.262 |
leaf | +0.260 |
fly | +0.257 |
fright | +0.256 |
Neg logit contributions | |
nicely | -0.091 |
fair | -0.088 |
uddle | -0.087 |
aws | -0.083 |
favorite | -0.080 |
Token | and |
Token ID | 290 |
Feature activation | +4.18e-02 |
Pos logit contributions | |
uddle | +0.205 |
nicely | +0.201 |
aws | +0.188 |
fair | +0.187 |
favorite | +0.181 |
Neg logit contributions | |
keeping | -0.275 |
fly | -0.265 |
leaf | -0.259 |
fright | -0.259 |
outage | -0.255 |
Token | more |
Token ID | 517 |
Feature activation | -9.02e-02 |
Pos logit contributions | |
keeping | +0.078 |
outage | +0.073 |
leaf | +0.073 |
fright | +0.071 |
harm | +0.071 |
Neg logit contributions | |
nicely | -0.058 |
uddle | -0.056 |
fair | -0.056 |
aws | -0.055 |
favorite | -0.053 |
Token | . |
Token ID | 13 |
Feature activation | +5.53e-03 |
Pos logit contributions | |
nicely | +0.112 |
fair | +0.111 |
aws | +0.104 |
uddle | +0.103 |
favorite | +0.101 |
Neg logit contributions | |
keeping | -0.186 |
outage | -0.176 |
leaf | -0.174 |
fright | -0.169 |
Judy | -0.169 |
Token | . |
Token ID | 13 |
Feature activation | +9.99e-03 |
Pos logit contributions | |
nicely | +0.098 |
uddle | +0.097 |
fair | +0.096 |
aws | +0.096 |
favorite | +0.091 |
Neg logit contributions | |
keeping | -0.143 |
outage | -0.134 |
fly | -0.128 |
harm | -0.128 |
leaf | -0.128 |
Token | They |
Token ID | 1119 |
Feature activation | -3.25e-02 |
Pos logit contributions | |
keeping | +0.020 |
outage | +0.018 |
leaf | +0.018 |
fright | +0.018 |
Judy | +0.018 |
Neg logit contributions | |
nicely | -0.013 |
uddle | -0.013 |
fair | -0.013 |
aws | -0.012 |
favorite | -0.012 |
Token | use |
Token ID | 779 |
Feature activation | -9.55e-03 |
Pos logit contributions | |
nicely | +0.050 |
fair | +0.050 |
favorite | +0.047 |
uddle | +0.046 |
nice | +0.046 |
Neg logit contributions | |
keeping | -0.063 |
eway | -0.055 |
ull | -0.055 |
outage | -0.054 |
leaf | -0.053 |
Token | their |
Token ID | 511 |
Feature activation | -7.70e-02 |
Pos logit contributions | |
nicely | +0.011 |
fair | +0.011 |
favorite | +0.011 |
cart | +0.010 |
nice | +0.010 |
Neg logit contributions | |
keeping | -0.021 |
leaf | -0.019 |
outage | -0.019 |
fly | -0.019 |
Judy | -0.018 |
Token | tooth |
Token ID | 16162 |
Feature activation | -9.70e-03 |
Pos logit contributions | |
favorite | +0.107 |
fair | +0.105 |
nicely | +0.103 |
cart | +0.101 |
uddle | +0.101 |
Neg logit contributions | |
keeping | -0.153 |
outage | -0.141 |
leaf | -0.140 |
fly | -0.137 |
ull | -0.137 |
Token | br |
Token ID | 1671 |
Feature activation | -1.78e-01 |
Pos logit contributions | |
nicely | +0.014 |
fair | +0.013 |
uddle | +0.013 |
aws | +0.013 |
favorite | +0.012 |
Neg logit contributions | |
keeping | -0.018 |
outage | -0.017 |
leaf | -0.016 |
fly | -0.016 |
fright | -0.016 |
Token | ushes |
Token ID | 17237 |
Feature activation | +5.78e-02 |
Pos logit contributions | |
nicely | +0.273 |
fair | +0.267 |
uddle | +0.264 |
aws | +0.260 |
favorite | +0.254 |
Neg logit contributions | |
keeping | -0.309 |
outage | -0.286 |
leaf | -0.284 |
fright | -0.278 |
harm | -0.277 |
Token | and |
Token ID | 290 |
Feature activation | +4.00e-02 |
Pos logit contributions | |
keeping | +0.139 |
leaf | +0.133 |
outage | +0.132 |
fright | +0.129 |
harm | +0.128 |
Neg logit contributions | |
nicely | -0.056 |
fair | -0.052 |
aws | -0.047 |
favorite | -0.047 |
nice | -0.044 |
Token | tooth |
Token ID | 16162 |
Feature activation | -1.12e-02 |
Pos logit contributions | |
keeping | +0.075 |
outage | +0.070 |
eway | +0.068 |
leaf | +0.068 |
ull | +0.066 |
Neg logit contributions | |
fair | -0.060 |
nicely | -0.058 |
favorite | -0.056 |
nice | -0.054 |
cooler | -0.053 |
Token | paste |
Token ID | 34274 |
Feature activation | -3.18e-03 |
Pos logit contributions | |
nicely | +0.019 |
fair | +0.018 |
uddle | +0.018 |
aws | +0.018 |
favorite | +0.017 |
Neg logit contributions | |
keeping | -0.018 |
outage | -0.017 |
fly | -0.016 |
leaf | -0.016 |
harm | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +8.35e-03 |
Pos logit contributions | |
uddle | +0.004 |
nicely | +0.004 |
fair | +0.004 |
aws | +0.004 |
favorite | +0.004 |
Neg logit contributions | |
keeping | -0.006 |
outage | -0.006 |
fly | -0.006 |
harm | -0.006 |
leaf | -0.006 |
Token | Where |
Token ID | 8496 |
Feature activation | +1.00e-02 |
Pos logit contributions | |
nicely | +0.142 |
fair | +0.139 |
uddle | +0.138 |
aws | +0.135 |
favorite | +0.133 |
Neg logit contributions | |
keeping | -0.139 |
outage | -0.128 |
leaf | -0.127 |
fright | -0.124 |
harm | -0.124 |
Token | are |
Token ID | 389 |
Feature activation | -3.96e-02 |
Pos logit contributions | |
keeping | +0.019 |
leaf | +0.017 |
outage | +0.017 |
harm | +0.016 |
fly | +0.016 |
Neg logit contributions | |
fair | -0.015 |
nicely | -0.015 |
uddle | -0.015 |
favorite | -0.014 |
aws | -0.014 |
Token | the |
Token ID | 262 |
Feature activation | -2.45e-02 |
Pos logit contributions | |
nicely | +0.073 |
uddle | +0.073 |
aws | +0.068 |
fair | +0.068 |
udd | +0.067 |
Neg logit contributions | |
keeping | -0.053 |
leaf | -0.049 |
Judy | -0.049 |
icated | -0.049 |
fright | -0.048 |
Token | baby |
Token ID | 5156 |
Feature activation | +7.40e-02 |
Pos logit contributions | |
fair | +0.032 |
aws | +0.029 |
favorite | +0.029 |
nicely | +0.029 |
uddle | +0.028 |
Neg logit contributions | |
keeping | -0.053 |
outage | -0.048 |
leaf | -0.047 |
fright | -0.046 |
Judy | -0.046 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.03e-02 |
Pos logit contributions | |
keeping | +0.155 |
leaf | +0.138 |
outage | +0.135 |
fright | +0.129 |
eway | +0.129 |
Neg logit contributions | |
fair | -0.105 |
favorite | -0.096 |
best | -0.095 |
nice | -0.093 |
nicely | -0.093 |
Token | br |
Token ID | 1671 |
Feature activation | -1.78e-01 |
Pos logit contributions | |
keeping | +0.100 |
outage | +0.093 |
leaf | +0.091 |
fright | +0.091 |
harm | +0.091 |
Neg logit contributions | |
nicely | -0.034 |
fair | -0.033 |
uddle | -0.032 |
aws | -0.031 |
favorite | -0.029 |
Token | as |
Token ID | 292 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
nicely | +0.267 |
aws | +0.264 |
uddle | +0.260 |
fair | +0.255 |
favorite | +0.249 |
Neg logit contributions | |
keeping | -0.325 |
fright | -0.294 |
leaf | -0.294 |
outage | -0.293 |
harm | -0.291 |
Token | ?" |
Token ID | 1701 |
Feature activation | +6.94e-02 |
Pos logit contributions | |
keeping | +0.208 |
leaf | +0.194 |
outage | +0.187 |
Judy | +0.179 |
fright | +0.179 |
Neg logit contributions | |
nicely | -0.162 |
fair | -0.161 |
uddle | -0.150 |
aws | -0.147 |
cart | -0.146 |
Token | Tom |
Token ID | 4186 |
Feature activation | +3.61e-02 |
Pos logit contributions | |
keeping | +0.148 |
outage | +0.138 |
leaf | +0.137 |
fly | +0.135 |
fright | +0.133 |
Neg logit contributions | |
nicely | -0.083 |
fair | -0.081 |
aws | -0.077 |
uddle | -0.076 |
favorite | -0.075 |
Token | asked |
Token ID | 1965 |
Feature activation | +1.07e-02 |
Pos logit contributions | |
keeping | +0.062 |
outage | +0.057 |
leaf | +0.057 |
fright | +0.055 |
harm | +0.055 |
Neg logit contributions | |
nicely | -0.056 |
fair | -0.055 |
uddle | -0.054 |
aws | -0.053 |
favorite | -0.052 |
Token | Mom |
Token ID | 11254 |
Feature activation | +7.64e-02 |
Pos logit contributions | |
keeping | +0.019 |
Judy | +0.018 |
fright | +0.018 |
harm | +0.018 |
leaf | +0.018 |
Neg logit contributions | |
uddle | -0.015 |
fair | -0.014 |
nicely | -0.014 |
aws | -0.014 |
sandbox | -0.014 |
Token | says |
Token ID | 1139 |
Feature activation | +2.93e-02 |
Pos logit contributions | |
fair | +0.006 |
nicely | +0.006 |
nice | +0.006 |
cooler | +0.006 |
favorite | +0.006 |
Neg logit contributions | |
keeping | -0.007 |
rum | -0.006 |
outage | -0.006 |
fright | -0.006 |
leaf | -0.006 |
Token | " |
Token ID | 366 |
Feature activation | -6.41e-02 |
Pos logit contributions | |
keeping | +0.074 |
leaf | +0.069 |
outage | +0.068 |
harm | +0.068 |
fright | +0.067 |
Neg logit contributions | |
nicely | -0.025 |
fair | -0.023 |
aws | -0.022 |
uddle | -0.021 |
favorite | -0.020 |
Token | Feed |
Token ID | 18332 |
Feature activation | -3.20e-02 |
Pos logit contributions | |
fair | +0.087 |
nicely | +0.086 |
aws | +0.085 |
nice | +0.076 |
hockey | +0.075 |
Neg logit contributions | |
keeping | -0.120 |
outage | -0.120 |
Maria | -0.116 |
Judy | -0.114 |
leaf | -0.111 |
Token | the |
Token ID | 262 |
Feature activation | -2.33e-02 |
Pos logit contributions | |
nicely | +0.047 |
fair | +0.046 |
aws | +0.044 |
uddle | +0.042 |
cart | +0.042 |
Neg logit contributions | |
keeping | -0.060 |
outage | -0.059 |
leaf | -0.056 |
fright | -0.056 |
harm | -0.054 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.71e-02 |
Pos logit contributions | |
fair | +0.030 |
nicely | +0.029 |
cart | +0.027 |
aws | +0.027 |
nice | +0.027 |
Neg logit contributions | |
keeping | -0.049 |
rum | -0.047 |
jo | -0.047 |
outage | -0.047 |
Maria | -0.046 |
Token | br |
Token ID | 1671 |
Feature activation | -1.78e-01 |
Pos logit contributions | |
keeping | +0.102 |
outage | +0.092 |
fright | +0.090 |
harm | +0.090 |
Judy | +0.089 |
Neg logit contributions | |
nicely | -0.057 |
fair | -0.056 |
aws | -0.054 |
uddle | -0.054 |
cart | -0.051 |
Token | as |
Token ID | 292 |
Feature activation | +1.13e-01 |
Pos logit contributions | |
nicely | +0.258 |
aws | +0.255 |
uddle | +0.242 |
favorite | +0.241 |
fair | +0.240 |
Neg logit contributions | |
keeping | -0.340 |
fright | -0.313 |
harm | -0.310 |
leaf | -0.307 |
outage | -0.307 |
Token | ". |
Token ID | 1911 |
Feature activation | +4.40e-02 |
Pos logit contributions | |
keeping | +0.200 |
leaf | +0.190 |
outage | +0.187 |
fright | +0.183 |
harm | +0.180 |
Neg logit contributions | |
nicely | -0.181 |
fair | -0.170 |
cart | -0.163 |
aws | -0.161 |
gym | -0.154 |
Token | He |
Token ID | 679 |
Feature activation | -4.78e-02 |
Pos logit contributions | |
keeping | +0.096 |
outage | +0.091 |
leaf | +0.090 |
fright | +0.088 |
harm | +0.088 |
Neg logit contributions | |
nicely | -0.048 |
fair | -0.046 |
uddle | -0.046 |
aws | -0.044 |
favorite | -0.043 |
Token | has |
Token ID | 468 |
Feature activation | +1.39e-02 |
Pos logit contributions | |
nicely | +0.084 |
fair | +0.083 |
uddle | +0.081 |
aws | +0.081 |
favorite | +0.079 |
Neg logit contributions | |
keeping | -0.074 |
outage | -0.067 |
leaf | -0.066 |
harm | -0.064 |
fright | -0.064 |
Token | some |
Token ID | 617 |
Feature activation | -2.56e-03 |
Pos logit contributions | |
keeping | +0.031 |
outage | +0.029 |
fly | +0.029 |
leaf | +0.029 |
icated | +0.028 |
Neg logit contributions | |
fair | -0.016 |
favorite | -0.016 |
nice | -0.015 |
nicely | -0.015 |
aws | -0.014 |
Token |
|
Token ID | 198 |
Feature activation | -1.14e-04 |
Pos logit contributions | |
keeping | +0.024 |
outage | +0.023 |
leaf | +0.022 |
harm | +0.022 |
fright | +0.022 |
Neg logit contributions | |
nicely | -0.013 |
fair | -0.013 |
uddle | -0.013 |
aws | -0.013 |
favorite | -0.012 |
Token |
|
Token ID | 198 |
Feature activation | -9.60e-04 |
Pos logit contributions | |
nicely | +0.000 |
aws | +0.000 |
fair | +0.000 |
uddle | +0.000 |
nice | +0.000 |
Neg logit contributions | |
keeping | -0.000 |
outage | -0.000 |
fly | -0.000 |
leaf | -0.000 |
harm | -0.000 |
Token | The |
Token ID | 464 |
Feature activation | +4.09e-02 |
Pos logit contributions | |
nicely | +0.001 |
fair | +0.001 |
uddle | +0.001 |
aws | +0.001 |
favorite | +0.001 |
Neg logit contributions | |
keeping | -0.002 |
outage | -0.002 |
leaf | -0.002 |
fright | -0.002 |
harm | -0.002 |
Token | baby |
Token ID | 5156 |
Feature activation | +7.73e-02 |
Pos logit contributions | |
keeping | +0.092 |
outage | +0.085 |
Judy | +0.084 |
rum | +0.083 |
leaf | +0.083 |
Neg logit contributions | |
fair | -0.048 |
nicely | -0.047 |
uddle | -0.043 |
favorite | -0.043 |
cart | -0.042 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.57e-02 |
Pos logit contributions | |
keeping | +0.154 |
outage | +0.141 |
leaf | +0.139 |
eway | +0.137 |
fright | +0.134 |
Neg logit contributions | |
fair | -0.109 |
nicely | -0.108 |
uddle | -0.102 |
favorite | -0.102 |
aws | -0.101 |
Token | br |
Token ID | 1671 |
Feature activation | -1.78e-01 |
Pos logit contributions | |
keeping | +0.105 |
outage | +0.099 |
leaf | +0.099 |
fright | +0.097 |
harm | +0.097 |
Neg logit contributions | |
nicely | -0.044 |
fair | -0.042 |
uddle | -0.042 |
aws | -0.040 |
favorite | -0.039 |
Token | as |
Token ID | 292 |
Feature activation | +1.13e-01 |
Pos logit contributions | |
nicely | +0.270 |
aws | +0.263 |
fair | +0.260 |
uddle | +0.260 |
favorite | +0.251 |
Neg logit contributions | |
keeping | -0.319 |
outage | -0.290 |
fright | -0.289 |
leaf | -0.289 |
harm | -0.287 |
Token | looked |
Token ID | 3114 |
Feature activation | -9.32e-02 |
Pos logit contributions | |
keeping | +0.231 |
outage | +0.217 |
leaf | +0.213 |
fright | +0.207 |
harm | +0.207 |
Neg logit contributions | |
nicely | -0.145 |
fair | -0.140 |
aws | -0.135 |
uddle | -0.131 |
cart | -0.126 |
Token | at |
Token ID | 379 |
Feature activation | +1.85e-02 |
Pos logit contributions | |
uddle | +0.079 |
nicely | +0.076 |
aws | +0.071 |
fair | +0.070 |
cart | +0.067 |
Neg logit contributions | |
keeping | -0.213 |
leaf | -0.208 |
fright | -0.207 |
harm | -0.204 |
fly | -0.204 |
Token | them |
Token ID | 606 |
Feature activation | -5.40e-02 |
Pos logit contributions | |
keeping | +0.042 |
outage | +0.042 |
leaf | +0.041 |
fright | +0.039 |
fly | +0.039 |
Neg logit contributions | |
nicely | -0.019 |
fair | -0.019 |
aws | -0.018 |
favorite | -0.017 |
uddle | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
uddle | +0.069 |
nicely | +0.068 |
fair | +0.068 |
aws | +0.066 |
favorite | +0.063 |
Neg logit contributions | |
keeping | -0.103 |
outage | -0.096 |
fly | -0.096 |
leaf | -0.096 |
fright | -0.095 |
Token | Lily |
Token ID | 20037 |
Feature activation | +3.44e-02 |
Pos logit contributions | |
keeping | +0.107 |
outage | +0.103 |
leaf | +0.101 |
eway | +0.097 |
harm | +0.096 |
Neg logit contributions | |
fair | -0.040 |
nicely | -0.040 |
favorite | -0.034 |
nice | -0.033 |
cart | -0.032 |
Token | looked |
Token ID | 3114 |
Feature activation | -9.42e-02 |
Pos logit contributions | |
keeping | +0.054 |
leaf | +0.050 |
fright | +0.050 |
Judy | +0.049 |
fly | +0.048 |
Neg logit contributions | |
nicely | -0.055 |
uddle | -0.054 |
fair | -0.054 |
aws | -0.053 |
favorite | -0.053 |
Token | at |
Token ID | 379 |
Feature activation | +1.80e-02 |
Pos logit contributions | |
Days | +0.099 |
aws | +0.093 |
gym | +0.093 |
sandbox | +0.089 |
uddle | +0.089 |
Neg logit contributions | |
fright | -0.183 |
leaf | -0.181 |
Maria | -0.178 |
Judy | -0.176 |
harm | -0.173 |
Token | the |
Token ID | 262 |
Feature activation | -2.42e-02 |
Pos logit contributions | |
keeping | +0.045 |
outage | +0.044 |
leaf | +0.042 |
've | +0.041 |
rum | +0.041 |
Neg logit contributions | |
nicely | -0.018 |
fair | -0.017 |
favorite | -0.017 |
aws | -0.016 |
cart | -0.014 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.39e-02 |
Pos logit contributions | |
fair | +0.030 |
uddle | +0.028 |
aws | +0.028 |
favorite | +0.028 |
nicely | +0.027 |
Neg logit contributions | |
keeping | -0.054 |
outage | -0.050 |
rum | -0.048 |
leaf | -0.048 |
Judy | -0.048 |
Token | br |
Token ID | 1671 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
keeping | +0.095 |
leaf | +0.091 |
outage | +0.091 |
fly | +0.088 |
harm | +0.088 |
Neg logit contributions | |
nicely | -0.047 |
fair | -0.045 |
uddle | -0.044 |
aws | -0.043 |
favorite | -0.043 |
Token | as |
Token ID | 292 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
nicely | +0.276 |
aws | +0.267 |
uddle | +0.266 |
fair | +0.265 |
favorite | +0.256 |
Neg logit contributions | |
keeping | -0.311 |
leaf | -0.285 |
outage | -0.284 |
fright | -0.283 |
harm | -0.279 |
Token | through |
Token ID | 832 |
Feature activation | +2.04e-02 |
Pos logit contributions | |
keeping | +0.233 |
outage | +0.218 |
leaf | +0.216 |
fright | +0.212 |
fly | +0.212 |
Neg logit contributions | |
nicely | -0.137 |
fair | -0.134 |
uddle | -0.133 |
aws | -0.129 |
favorite | -0.125 |
Token | the |
Token ID | 262 |
Feature activation | -2.46e-02 |
Pos logit contributions | |
keeping | +0.039 |
outage | +0.038 |
leaf | +0.038 |
icated | +0.035 |
fly | +0.035 |
Neg logit contributions | |
nicely | -0.028 |
fair | -0.028 |
aws | -0.027 |
favorite | -0.025 |
uddle | -0.025 |
Token | fence |
Token ID | 13990 |
Feature activation | +4.46e-02 |
Pos logit contributions | |
fair | +0.030 |
uddle | +0.030 |
nicely | +0.030 |
aws | +0.028 |
cart | +0.028 |
Neg logit contributions | |
keeping | -0.051 |
outage | -0.049 |
leaf | -0.048 |
ull | -0.046 |
Judy | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +5.77e-03 |
Pos logit contributions | |
keeping | +0.084 |
outage | +0.078 |
fly | +0.077 |
harm | +0.077 |
fright | +0.077 |
Neg logit contributions | |
uddle | -0.059 |
nicely | -0.058 |
fair | -0.058 |
aws | -0.056 |
favorite | -0.056 |
Token | excited |
Token ID | 6568 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
nicely | +0.060 |
fair | +0.060 |
uddle | +0.057 |
favorite | +0.057 |
aws | +0.056 |
Neg logit contributions | |
keeping | -0.081 |
outage | -0.077 |
harm | -0.074 |
leaf | -0.074 |
fly | -0.072 |
Token | ly |
Token ID | 306 |
Feature activation | -2.22e-02 |
Pos logit contributions | |
keeping | +0.032 |
outage | +0.030 |
leaf | +0.030 |
fright | +0.029 |
harm | +0.029 |
Neg logit contributions | |
nicely | -0.019 |
fair | -0.018 |
uddle | -0.018 |
aws | -0.018 |
favorite | -0.017 |
Token | grabbed |
Token ID | 13176 |
Feature activation | -3.46e-02 |
Pos logit contributions | |
nicely | +0.035 |
fair | +0.034 |
uddle | +0.034 |
aws | +0.033 |
favorite | +0.032 |
Neg logit contributions | |
keeping | -0.038 |
outage | -0.035 |
leaf | -0.035 |
fright | -0.034 |
harm | -0.034 |
Token | her |
Token ID | 607 |
Feature activation | -5.02e-02 |
Pos logit contributions | |
nicely | +0.050 |
fair | +0.049 |
favorite | +0.046 |
aws | +0.045 |
cart | +0.045 |
Neg logit contributions | |
keeping | -0.067 |
outage | -0.066 |
fly | -0.062 |
jo | -0.060 |
leaf | -0.060 |
Token | paint |
Token ID | 7521 |
Feature activation | -1.90e-02 |
Pos logit contributions | |
favorite | +0.061 |
uddle | +0.057 |
nicely | +0.057 |
cart | +0.056 |
fair | +0.056 |
Neg logit contributions | |
keeping | -0.118 |
outage | -0.110 |
icated | -0.108 |
leaf | -0.107 |
rum | -0.106 |
Token | br |
Token ID | 1671 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
nicely | +0.022 |
fair | +0.022 |
uddle | +0.021 |
aws | +0.021 |
favorite | +0.020 |
Neg logit contributions | |
keeping | -0.040 |
outage | -0.037 |
leaf | -0.037 |
fright | -0.036 |
fly | -0.036 |
Token | ushes |
Token ID | 17237 |
Feature activation | +5.41e-02 |
Pos logit contributions | |
nicely | +0.261 |
uddle | +0.255 |
fair | +0.253 |
aws | +0.252 |
favorite | +0.243 |
Neg logit contributions | |
keeping | -0.320 |
leaf | -0.296 |
outage | -0.295 |
fright | -0.290 |
harm | -0.290 |
Token | and |
Token ID | 290 |
Feature activation | +3.83e-02 |
Pos logit contributions | |
keeping | +0.122 |
outage | +0.117 |
leaf | +0.116 |
fright | +0.113 |
harm | +0.113 |
Neg logit contributions | |
nicely | -0.055 |
fair | -0.053 |
aws | -0.050 |
uddle | -0.049 |
favorite | -0.048 |
Token | set |
Token ID | 900 |
Feature activation | -4.53e-02 |
Pos logit contributions | |
keeping | +0.074 |
outage | +0.073 |
fly | +0.070 |
harm | +0.069 |
leaf | +0.069 |
Neg logit contributions | |
fair | -0.051 |
nicely | -0.051 |
favorite | -0.048 |
uddle | -0.048 |
aws | -0.047 |
Token | to |
Token ID | 284 |
Feature activation | +8.32e-02 |
Pos logit contributions | |
nicely | +0.074 |
fair | +0.072 |
uddle | +0.070 |
aws | +0.069 |
favorite | +0.068 |
Neg logit contributions | |
keeping | -0.075 |
outage | -0.070 |
leaf | -0.069 |
fly | -0.067 |
fright | -0.067 |
Token | work |
Token ID | 670 |
Feature activation | -4.14e-02 |
Pos logit contributions | |
keeping | +0.165 |
outage | +0.157 |
fly | +0.155 |
jo | +0.151 |
leaf | +0.148 |
Neg logit contributions | |
fair | -0.118 |
nicely | -0.117 |
nice | -0.105 |
favorite | -0.105 |
cart | -0.101 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
keeping | +0.075 |
fly | +0.072 |
outage | +0.072 |
leaf | +0.071 |
harm | +0.070 |
Neg logit contributions | |
uddle | -0.039 |
nicely | -0.039 |
fair | -0.037 |
aws | -0.036 |
favorite | -0.036 |
Token | like |
Token ID | 588 |
Feature activation | +2.77e-02 |
Pos logit contributions | |
fair | +0.159 |
nicely | +0.159 |
aws | +0.150 |
favorite | +0.148 |
uddle | +0.142 |
Neg logit contributions | |
keeping | -0.326 |
outage | -0.307 |
leaf | -0.301 |
harm | -0.296 |
fly | -0.295 |
Token | lions |
Token ID | 31420 |
Feature activation | +1.06e-02 |
Pos logit contributions | |
keeping | +0.052 |
outage | +0.048 |
leaf | +0.048 |
fright | +0.047 |
harm | +0.047 |
Neg logit contributions | |
nicely | -0.039 |
uddle | -0.038 |
fair | -0.037 |
aws | -0.036 |
favorite | -0.035 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
fly | +0.022 |
e | +0.022 |
keeping | +0.022 |
harm | +0.021 |
Lou | +0.020 |
Neg logit contributions | |
uddle | -0.011 |
nicely | -0.011 |
favorite | -0.010 |
fair | -0.010 |
Days | -0.010 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.37e-02 |
Pos logit contributions | |
nicely | +0.231 |
uddle | +0.228 |
fair | +0.218 |
aws | +0.212 |
favorite | +0.207 |
Neg logit contributions | |
keeping | -0.244 |
fly | -0.231 |
leaf | -0.229 |
fright | -0.228 |
outage | -0.227 |
Token | br |
Token ID | 1671 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
leaf | +0.088 |
keeping | +0.087 |
outage | +0.086 |
fright | +0.084 |
fly | +0.084 |
Neg logit contributions | |
nicely | -0.051 |
fair | -0.048 |
uddle | -0.047 |
favorite | -0.047 |
aws | -0.044 |
Token | as |
Token ID | 292 |
Feature activation | +1.12e-01 |
Pos logit contributions | |
nicely | +0.318 |
fair | +0.314 |
uddle | +0.307 |
cart | +0.298 |
cooler | +0.294 |
Neg logit contributions | |
keeping | -0.251 |
fly | -0.232 |
outage | -0.231 |
leaf | -0.228 |
jo | -0.224 |
Token | , |
Token ID | 11 |
Feature activation | -1.47e-01 |
Pos logit contributions | |
keeping | +0.258 |
fly | +0.245 |
outage | +0.243 |
leaf | +0.238 |
harm | +0.236 |
Neg logit contributions | |
uddle | -0.102 |
nicely | -0.102 |
fair | -0.101 |
favorite | -0.095 |
aws | -0.092 |
Token | and |
Token ID | 290 |
Feature activation | +4.29e-02 |
Pos logit contributions | |
nicely | +0.201 |
uddle | +0.200 |
fair | +0.186 |
aws | +0.182 |
favorite | +0.177 |
Neg logit contributions | |
keeping | -0.277 |
fly | -0.263 |
leaf | -0.259 |
fright | -0.258 |
outage | -0.256 |
Token | gir |
Token ID | 37370 |
Feature activation | -6.96e-03 |
Pos logit contributions | |
keeping | +0.079 |
outage | +0.073 |
leaf | +0.073 |
fright | +0.071 |
harm | +0.071 |
Neg logit contributions | |
nicely | -0.061 |
fair | -0.060 |
uddle | -0.059 |
aws | -0.058 |
favorite | -0.056 |
Token | aff |
Token ID | 2001 |
Feature activation | +4.04e-02 |
Pos logit contributions | |
nicely | +0.011 |
fair | +0.010 |
uddle | +0.010 |
favorite | +0.010 |
cart | +0.009 |
Neg logit contributions | |
keeping | -0.012 |
leaf | -0.011 |
harm | -0.011 |
fly | -0.011 |
jo | -0.011 |
Token | pond |
Token ID | 16723 |
Feature activation | -7.65e-02 |
Pos logit contributions | |
nicely | +0.025 |
fair | +0.025 |
uddle | +0.024 |
aws | +0.023 |
favorite | +0.022 |
Neg logit contributions | |
keeping | -0.050 |
outage | -0.047 |
leaf | -0.047 |
Judy | -0.046 |
fly | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +4.51e-03 |
Pos logit contributions | |
nicely | +0.085 |
fair | +0.082 |
uddle | +0.077 |
aws | +0.074 |
cart | +0.074 |
Neg logit contributions | |
leaf | -0.165 |
keeping | -0.165 |
outage | -0.160 |
icated | -0.155 |
fright | -0.154 |
Token | The |
Token ID | 383 |
Feature activation | +2.94e-02 |
Pos logit contributions | |
keeping | +0.010 |
outage | +0.010 |
harm | +0.010 |
leaf | +0.010 |
fright | +0.009 |
Neg logit contributions | |
nicely | -0.005 |
aws | -0.004 |
fair | -0.004 |
uddle | -0.004 |
cart | -0.004 |
Token | two |
Token ID | 734 |
Feature activation | +1.42e-02 |
Pos logit contributions | |
keeping | +0.065 |
outage | +0.061 |
leaf | +0.059 |
Judy | +0.058 |
fright | +0.058 |
Neg logit contributions | |
fair | -0.034 |
nicely | -0.033 |
uddle | -0.032 |
aws | -0.032 |
favorite | -0.031 |
Token | ze |
Token ID | 41271 |
Feature activation | +4.23e-02 |
Pos logit contributions | |
keeping | +0.021 |
harm | +0.021 |
fright | +0.021 |
leaf | +0.021 |
Judy | +0.020 |
Neg logit contributions | |
nicely | -0.023 |
uddle | -0.022 |
fair | -0.022 |
favorite | -0.021 |
aws | -0.021 |
Token | br |
Token ID | 1671 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
keeping | +0.071 |
leaf | +0.068 |
outage | +0.067 |
fright | +0.066 |
harm | +0.065 |
Neg logit contributions | |
nicely | -0.065 |
fair | -0.063 |
uddle | -0.063 |
favorite | -0.062 |
aws | -0.061 |
Token | as |
Token ID | 292 |
Feature activation | +1.13e-01 |
Pos logit contributions | |
aws | +0.234 |
nicely | +0.232 |
uddle | +0.230 |
fair | +0.220 |
udd | +0.213 |
Neg logit contributions | |
keeping | -0.351 |
harm | -0.322 |
fright | -0.322 |
outage | -0.318 |
Fairy | -0.318 |
Token | stayed |
Token ID | 9658 |
Feature activation | -8.43e-02 |
Pos logit contributions | |
keeping | +0.225 |
outage | +0.212 |
leaf | +0.209 |
icated | +0.202 |
harm | +0.199 |
Neg logit contributions | |
nicely | -0.152 |
fair | -0.142 |
uddle | -0.138 |
cart | -0.135 |
aws | -0.134 |
Token | together |
Token ID | 1978 |
Feature activation | -3.25e-02 |
Pos logit contributions | |
nicely | +0.097 |
uddle | +0.096 |
aws | +0.094 |
fair | +0.092 |
favorite | +0.087 |
Neg logit contributions | |
keeping | -0.173 |
fright | -0.163 |
outage | -0.160 |
harm | -0.159 |
fly | -0.159 |
Token | for |
Token ID | 329 |
Feature activation | +5.38e-02 |
Pos logit contributions | |
nicely | +0.033 |
fair | +0.032 |
uddle | +0.031 |
aws | +0.030 |
favorite | +0.029 |
Neg logit contributions | |
keeping | -0.074 |
outage | -0.070 |
leaf | -0.070 |
fright | -0.068 |
harm | -0.068 |
Token | a |
Token ID | 257 |
Feature activation | +3.43e-02 |
Pos logit contributions | |
outage | +0.112 |
keeping | +0.110 |
leaf | +0.106 |
fly | +0.104 |
rated | +0.104 |
Neg logit contributions | |
fair | -0.068 |
nicely | -0.068 |
hockey | -0.060 |
nice | -0.060 |
aws | -0.059 |
Token | closed |
Token ID | 4838 |
Feature activation | -2.40e-02 |
Pos logit contributions | |
nicely | +0.120 |
uddle | +0.119 |
aws | +0.118 |
fair | +0.113 |
cart | +0.109 |
Neg logit contributions | |
keeping | -0.189 |
harm | -0.179 |
fright | -0.179 |
outage | -0.178 |
fly | -0.176 |
Token | and |
Token ID | 290 |
Feature activation | +3.68e-02 |
Pos logit contributions | |
nicely | +0.029 |
uddle | +0.029 |
fair | +0.028 |
aws | +0.028 |
gym | +0.026 |
Neg logit contributions | |
keeping | -0.049 |
outage | -0.045 |
fright | -0.045 |
harm | -0.044 |
fly | -0.044 |
Token | the |
Token ID | 262 |
Feature activation | -2.17e-02 |
Pos logit contributions | |
keeping | +0.072 |
leaf | +0.071 |
outage | +0.070 |
rum | +0.065 |
ull | +0.065 |
Neg logit contributions | |
fair | -0.054 |
nicely | -0.054 |
favorite | -0.048 |
nice | -0.047 |
cooler | -0.045 |
Token | bird |
Token ID | 6512 |
Feature activation | +5.31e-02 |
Pos logit contributions | |
fair | +0.029 |
nicely | +0.027 |
uddle | +0.027 |
cart | +0.027 |
favorite | +0.027 |
Neg logit contributions | |
keeping | -0.045 |
leaf | -0.042 |
outage | -0.042 |
eway | -0.041 |
rum | -0.041 |
Token | ie |
Token ID | 494 |
Feature activation | +3.96e-02 |
Pos logit contributions | |
keeping | +0.096 |
leaf | +0.095 |
outage | +0.087 |
fright | +0.082 |
icated | +0.082 |
Neg logit contributions | |
nicely | -0.084 |
fair | -0.082 |
favorite | -0.081 |
uddle | -0.079 |
nice | -0.076 |
Token | couldn |
Token ID | 3521 |
Feature activation | +1.18e-01 |
Pos logit contributions | |
keeping | +0.074 |
leaf | +0.072 |
outage | +0.070 |
icated | +0.067 |
eway | +0.066 |
Neg logit contributions | |
nicely | -0.058 |
fair | -0.057 |
uddle | -0.057 |
favorite | -0.056 |
aws | -0.054 |
Token | 't |
Token ID | 470 |
Feature activation | +1.22e-01 |
Pos logit contributions | |
keeping | +0.192 |
leaf | +0.176 |
outage | +0.170 |
fly | +0.164 |
Judy | +0.164 |
Neg logit contributions | |
nicely | -0.202 |
fair | -0.195 |
uddle | -0.194 |
aws | -0.190 |
gym | -0.185 |
Token | fly |
Token ID | 6129 |
Feature activation | -7.39e-02 |
Pos logit contributions | |
keeping | +0.231 |
leaf | +0.211 |
outage | +0.204 |
Judy | +0.202 |
icated | +0.198 |
Neg logit contributions | |
nicely | -0.184 |
fair | -0.177 |
uddle | -0.163 |
favorite | -0.163 |
nice | -0.159 |
Token | away |
Token ID | 1497 |
Feature activation | -1.20e-02 |
Pos logit contributions | |
uddle | +0.091 |
nicely | +0.089 |
fair | +0.087 |
aws | +0.086 |
cart | +0.083 |
Neg logit contributions | |
keeping | -0.148 |
fright | -0.139 |
harm | -0.139 |
outage | -0.138 |
fly | -0.136 |
Token | . |
Token ID | 13 |
Feature activation | +1.10e-02 |
Pos logit contributions | |
nicely | +0.015 |
fair | +0.014 |
uddle | +0.014 |
aws | +0.014 |
favorite | +0.013 |
Neg logit contributions | |
keeping | -0.024 |
leaf | -0.023 |
outage | -0.023 |
fright | -0.022 |
harm | -0.022 |
Token |
|
Token ID | 198 |
Feature activation | -9.30e-03 |
Pos logit contributions | |
keeping | +0.021 |
outage | +0.020 |
leaf | +0.020 |
fright | +0.019 |
Judy | +0.019 |
Neg logit contributions | |
nicely | -0.015 |
fair | -0.014 |
uddle | -0.014 |
aws | -0.014 |
favorite | -0.013 |
Token | � |
Token ID | 6184 |
Feature activation | +2.34e-02 |
Pos logit contributions | |
uddle | +0.129 |
aws | +0.122 |
nicely | +0.120 |
fair | +0.119 |
favorite | +0.110 |
Neg logit contributions | |
keeping | -0.362 |
leaf | -0.349 |
outage | -0.344 |
Judy | -0.343 |
fright | -0.342 |
Token | � |
Token ID | 95 |
Feature activation | +2.83e-03 |
Pos logit contributions | |
leaf | +0.035 |
grant | +0.034 |
outage | +0.033 |
keeping | +0.033 |
Fairy | +0.033 |
Neg logit contributions | |
aws | -0.038 |
fair | -0.038 |
favorite | -0.038 |
sandbox | -0.037 |
best | -0.036 |
Token | € |
Token ID | 26391 |
Feature activation | +1.08e-02 |
Pos logit contributions | |
keeping | +0.004 |
outage | +0.004 |
harm | +0.004 |
leaf | +0.004 |
fright | +0.004 |
Neg logit contributions | |
nicely | -0.005 |
fair | -0.005 |
uddle | -0.005 |
aws | -0.005 |
favorite | -0.004 |
Token | � |
Token ID | 129 |
Feature activation | +2.86e-02 |
Pos logit contributions | |
keeping | +0.022 |
leaf | +0.021 |
harm | +0.021 |
outage | +0.021 |
fright | +0.020 |
Neg logit contributions | |
uddle | -0.013 |
nicely | -0.013 |
fair | -0.013 |
aws | -0.013 |
cart | -0.012 |
Token | � |
Token ID | 241 |
Feature activation | -6.70e-02 |
Pos logit contributions | |
keeping | +0.051 |
leaf | +0.048 |
outage | +0.047 |
fright | +0.046 |
harm | +0.046 |
Neg logit contributions | |
nicely | -0.042 |
uddle | -0.041 |
fair | -0.041 |
aws | -0.039 |
favorite | -0.039 |
Token | Oh |
Token ID | 5812 |
Feature activation | +1.61e-01 |
Pos logit contributions | |
fair | +0.084 |
aws | +0.083 |
nicely | +0.082 |
favorite | +0.080 |
hockey | +0.080 |
Neg logit contributions | |
fly | -0.130 |
keeping | -0.130 |
harm | -0.129 |
outage | -0.125 |
tal | -0.123 |
Token | , |
Token ID | 11 |
Feature activation | -1.56e-01 |
Pos logit contributions | |
keeping | +0.327 |
outage | +0.298 |
harm | +0.297 |
leaf | +0.294 |
fly | +0.287 |
Neg logit contributions | |
fair | -0.221 |
nicely | -0.215 |
aws | -0.202 |
uddle | -0.200 |
favorite | -0.199 |
Token | I |
Token ID | 314 |
Feature activation | +3.43e-02 |
Pos logit contributions | |
fair | +0.217 |
nicely | +0.216 |
nice | +0.196 |
favorite | +0.190 |
cooler | +0.187 |
Neg logit contributions | |
keeping | -0.319 |
outage | -0.293 |
harm | -0.288 |
fly | -0.285 |
leaf | -0.282 |
Token | â |
Token ID | 22940 |
Feature activation | +7.42e-02 |
Pos logit contributions | |
keeping | +0.065 |
icated | +0.055 |
outage | +0.054 |
harm | +0.054 |
fly | +0.053 |
Neg logit contributions | |
nicely | -0.055 |
fair | -0.054 |
nice | -0.052 |
favorite | -0.051 |
uddle | -0.051 |
Token | € |
Token ID | 26391 |
Feature activation | +1.05e-02 |
Pos logit contributions | |
keeping | +0.110 |
outage | +0.105 |
fly | +0.103 |
harm | +0.101 |
fright | +0.101 |
Neg logit contributions | |
nicely | -0.132 |
fair | -0.129 |
gym | -0.118 |
uddle | -0.118 |
aws | -0.117 |
Token | â„¢ |
Token ID | 8151 |
Feature activation | +1.95e-02 |
Pos logit contributions | |
keeping | +0.019 |
leaf | +0.018 |
harm | +0.018 |
outage | +0.018 |
fright | +0.017 |
Neg logit contributions | |
uddle | -0.015 |
nicely | -0.015 |
aws | -0.015 |
fair | -0.014 |
cart | -0.014 |
Token | Winston |
Token ID | 21400 |
Feature activation | +5.75e-02 |
Pos logit contributions | |
keeping | +0.020 |
outage | +0.018 |
leaf | +0.018 |
fright | +0.018 |
harm | +0.018 |
Neg logit contributions | |
nicely | -0.014 |
fair | -0.014 |
uddle | -0.014 |
aws | -0.013 |
favorite | -0.013 |
Token | wanted |
Token ID | 2227 |
Feature activation | +3.89e-02 |
Pos logit contributions | |
keeping | +0.099 |
outage | +0.092 |
leaf | +0.091 |
fright | +0.089 |
harm | +0.088 |
Neg logit contributions | |
nicely | -0.089 |
fair | -0.087 |
uddle | -0.086 |
aws | -0.084 |
favorite | -0.082 |
Token | to |
Token ID | 284 |
Feature activation | +8.85e-02 |
Pos logit contributions | |
keeping | +0.086 |
outage | +0.080 |
leaf | +0.079 |
fly | +0.079 |
fright | +0.077 |
Neg logit contributions | |
nicely | -0.043 |
fair | -0.042 |
aws | -0.039 |
uddle | -0.039 |
favorite | -0.038 |
Token | keep |
Token ID | 1394 |
Feature activation | -1.58e-03 |
Pos logit contributions | |
keeping | +0.179 |
outage | +0.168 |
icated | +0.167 |
leaf | +0.164 |
jo | +0.164 |
Neg logit contributions | |
fair | -0.121 |
nicely | -0.121 |
nice | -0.113 |
p | -0.104 |
gym | -0.103 |
Token | Jo |
Token ID | 5302 |
Feature activation | +7.43e-02 |
Pos logit contributions | |
fair | +0.002 |
nicely | +0.002 |
favorite | +0.002 |
nice | +0.002 |
hockey | +0.002 |
Neg logit contributions | |
keeping | -0.003 |
outage | -0.003 |
leaf | -0.003 |
fly | -0.003 |
ull | -0.003 |
Token | jo |
Token ID | 7639 |
Feature activation | +1.41e-01 |
Pos logit contributions | |
keeping | +0.092 |
Warning | +0.077 |
grant | +0.077 |
growth | +0.071 |
leaf | +0.070 |
Neg logit contributions | |
favorite | -0.149 |
udd | -0.147 |
nice | -0.147 |
nicely | -0.145 |
fair | -0.145 |
Token | safe |
Token ID | 3338 |
Feature activation | -3.23e-02 |
Pos logit contributions | |
keeping | +0.326 |
leaf | +0.311 |
outage | +0.310 |
fly | +0.301 |
Judy | +0.296 |
Neg logit contributions | |
fair | -0.150 |
nicely | -0.149 |
favorite | -0.138 |
nice | -0.133 |
uddle | -0.131 |
Token | , |
Token ID | 11 |
Feature activation | -1.51e-01 |
Pos logit contributions | |
nicely | +0.032 |
fair | +0.031 |
uddle | +0.030 |
aws | +0.029 |
favorite | +0.028 |
Neg logit contributions | |
keeping | -0.074 |
outage | -0.069 |
leaf | -0.069 |
fright | -0.068 |
harm | -0.068 |
Token | so |
Token ID | 523 |
Feature activation | +4.67e-02 |
Pos logit contributions | |
fair | +0.125 |
nicely | +0.116 |
favorite | +0.108 |
nice | +0.102 |
cart | +0.102 |
Neg logit contributions | |
keeping | -0.384 |
leaf | -0.364 |
fly | -0.363 |
outage | -0.358 |
ull | -0.357 |
Token | he |
Token ID | 339 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
keeping | +0.112 |
leaf | +0.106 |
outage | +0.105 |
fly | +0.103 |
fright | +0.103 |
Neg logit contributions | |
nicely | -0.042 |
fair | -0.041 |
uddle | -0.039 |
aws | -0.038 |
favorite | -0.037 |
Token | chased |
Token ID | 26172 |
Feature activation | -3.81e-03 |
Pos logit contributions | |
nicely | +0.169 |
fair | +0.167 |
uddle | +0.162 |
aws | +0.159 |
favorite | +0.156 |
Neg logit contributions | |
keeping | -0.205 |
outage | -0.189 |
leaf | -0.186 |
harm | -0.181 |
fright | -0.180 |
Token | to |
Token ID | 284 |
Feature activation | +8.65e-02 |
Pos logit contributions | |
uddle | +0.109 |
nicely | +0.107 |
aws | +0.106 |
fair | +0.100 |
sandbox | +0.099 |
Neg logit contributions | |
keeping | -0.132 |
leaf | -0.124 |
outage | -0.120 |
fly | -0.119 |
harm | -0.117 |
Token | a |
Token ID | 257 |
Feature activation | +3.66e-02 |
Pos logit contributions | |
keeping | +0.161 |
outage | +0.150 |
leaf | +0.148 |
fly | +0.146 |
harm | +0.143 |
Neg logit contributions | |
fair | -0.130 |
nicely | -0.127 |
favorite | -0.118 |
nice | -0.116 |
aws | -0.114 |
Token | party |
Token ID | 2151 |
Feature activation | -6.95e-02 |
Pos logit contributions | |
leaf | +0.067 |
outage | +0.067 |
fright | +0.067 |
icated | +0.066 |
Fairy | +0.066 |
Neg logit contributions | |
aws | -0.045 |
uddle | -0.039 |
nicely | -0.038 |
udd | -0.038 |
cart | -0.035 |
Token | at |
Token ID | 379 |
Feature activation | +1.92e-02 |
Pos logit contributions | |
uddle | +0.083 |
aws | +0.078 |
nicely | +0.078 |
fair | +0.073 |
cart | +0.070 |
Neg logit contributions | |
keeping | -0.143 |
outage | -0.133 |
Maria | -0.131 |
leaf | -0.131 |
Judy | -0.129 |
Token | his |
Token ID | 465 |
Feature activation | -6.15e-02 |
Pos logit contributions | |
keeping | +0.032 |
outage | +0.028 |
leaf | +0.028 |
harm | +0.028 |
fright | +0.027 |
Neg logit contributions | |
uddle | -0.032 |
nicely | -0.031 |
aws | -0.030 |
fair | -0.030 |
favorite | -0.029 |
Token | friend |
Token ID | 1545 |
Feature activation | +1.20e-01 |
Pos logit contributions | |
nicely | +0.074 |
aws | +0.070 |
uddle | +0.068 |
cart | +0.065 |
udd | +0.065 |
Neg logit contributions | |
keeping | -0.106 |
Maria | -0.103 |
harm | -0.102 |
fright | -0.102 |
outage | -0.100 |
Token | 's |
Token ID | 338 |
Feature activation | -5.28e-02 |
Pos logit contributions | |
keeping | +0.228 |
Judy | +0.214 |
Maria | +0.214 |
outage | +0.211 |
leaf | +0.209 |
Neg logit contributions | |
uddle | -0.155 |
nicely | -0.154 |
fair | -0.148 |
aws | -0.146 |
favorite | -0.139 |
Token | house |
Token ID | 2156 |
Feature activation | -2.33e-02 |
Pos logit contributions | |
nicely | +0.062 |
aws | +0.060 |
uddle | +0.058 |
Days | +0.057 |
udd | +0.054 |
Neg logit contributions | |
keeping | -0.103 |
outage | -0.093 |
Maria | -0.093 |
fright | -0.092 |
harm | -0.091 |
Token | . |
Token ID | 13 |
Feature activation | +1.18e-02 |
Pos logit contributions | |
nicely | +0.030 |
fair | +0.029 |
uddle | +0.029 |
aws | +0.028 |
favorite | +0.027 |
Neg logit contributions | |
keeping | -0.046 |
outage | -0.043 |
leaf | -0.043 |
harm | -0.042 |
fright | -0.042 |
Token | Tim |
Token ID | 5045 |
Feature activation | +3.67e-02 |
Pos logit contributions | |
keeping | +0.022 |
outage | +0.020 |
leaf | +0.020 |
Judy | +0.020 |
Maria | +0.020 |
Neg logit contributions | |
nicely | -0.016 |
fair | -0.016 |
uddle | -0.016 |
aws | -0.016 |
favorite | -0.015 |
Token | my |
Token ID | 1820 |
Feature activation | +5.88e-02 |
Pos logit contributions | |
keeping | +0.074 |
leaf | +0.071 |
outage | +0.071 |
fright | +0.068 |
eway | +0.068 |
Neg logit contributions | |
fair | -0.049 |
nicely | -0.048 |
aws | -0.045 |
uddle | -0.045 |
favorite | -0.045 |
Token | many |
Token ID | 867 |
Feature activation | +3.81e-03 |
Pos logit contributions | |
nicely | +0.028 |
uddle | +0.028 |
aws | +0.027 |
fair | +0.027 |
favorite | +0.026 |
Neg logit contributions | |
keeping | -0.035 |
leaf | -0.033 |
outage | -0.033 |
fright | -0.032 |
harm | -0.032 |
Token | guitars |
Token ID | 32497 |
Feature activation | +5.63e-02 |
Pos logit contributions | |
keeping | +0.007 |
outage | +0.007 |
leaf | +0.007 |
harm | +0.007 |
fright | +0.007 |
Neg logit contributions | |
nicely | -0.005 |
uddle | -0.005 |
fair | -0.005 |
aws | -0.005 |
favorite | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +3.68e-02 |
Pos logit contributions | |
keeping | +0.121 |
outage | +0.114 |
leaf | +0.113 |
fright | +0.111 |
harm | +0.111 |
Neg logit contributions | |
nicely | -0.063 |
fair | -0.061 |
uddle | -0.060 |
aws | -0.058 |
favorite | -0.056 |
Token | he |
Token ID | 339 |
Feature activation | -1.19e-01 |
Pos logit contributions | |
keeping | +0.068 |
outage | +0.063 |
leaf | +0.062 |
fly | +0.061 |
fright | +0.061 |
Neg logit contributions | |
fair | -0.053 |
nicely | -0.053 |
uddle | -0.050 |
aws | -0.050 |
favorite | -0.049 |
Token | didn |
Token ID | 1422 |
Feature activation | +8.11e-02 |
Pos logit contributions | |
nicely | +0.192 |
fair | +0.187 |
uddle | +0.187 |
aws | +0.183 |
favorite | +0.177 |
Neg logit contributions | |
keeping | -0.195 |
outage | -0.181 |
leaf | -0.180 |
fright | -0.176 |
harm | -0.175 |
Token | 't |
Token ID | 470 |
Feature activation | +1.19e-01 |
Pos logit contributions | |
keeping | +0.119 |
outage | +0.105 |
leaf | +0.105 |
fly | +0.103 |
Judy | +0.102 |
Neg logit contributions | |
nicely | -0.150 |
fair | -0.146 |
uddle | -0.143 |
aws | -0.143 |
favorite | -0.139 |
Token | know |
Token ID | 760 |
Feature activation | +5.82e-02 |
Pos logit contributions | |
keeping | +0.166 |
fly | +0.149 |
eway | +0.144 |
leaf | +0.142 |
Judy | +0.142 |
Neg logit contributions | |
fair | -0.233 |
nicely | -0.229 |
nice | -0.217 |
favorite | -0.211 |
aws | -0.207 |
Token | which |
Token ID | 543 |
Feature activation | -6.60e-03 |
Pos logit contributions | |
keeping | +0.089 |
outage | +0.088 |
harm | +0.087 |
icated | +0.086 |
fright | +0.085 |
Neg logit contributions | |
uddle | -0.094 |
nicely | -0.087 |
aws | -0.087 |
fair | -0.086 |
cart | -0.084 |
Token | one |
Token ID | 530 |
Feature activation | -1.07e-01 |
Pos logit contributions | |
fair | +0.007 |
nicely | +0.007 |
favorite | +0.006 |
nice | +0.006 |
uddle | +0.006 |
Neg logit contributions | |
keeping | -0.016 |
leaf | -0.014 |
outage | -0.014 |
Judy | -0.014 |
fright | -0.014 |
Token | to |
Token ID | 284 |
Feature activation | +8.41e-02 |
Pos logit contributions | |
fair | +0.140 |
nicely | +0.139 |
favorite | +0.134 |
nice | +0.124 |
cart | +0.120 |
Neg logit contributions | |
keeping | -0.238 |
leaf | -0.208 |
fright | -0.201 |
rated | -0.200 |
outage | -0.198 |
Token | choose |
Token ID | 3853 |
Feature activation | -6.87e-02 |
Pos logit contributions | |
keeping | +0.152 |
outage | +0.135 |
fly | +0.132 |
leaf | +0.132 |
Judy | +0.129 |
Neg logit contributions | |
nicely | -0.135 |
fair | -0.135 |
aws | -0.125 |
nice | -0.124 |
favorite | -0.122 |
Token | . |
Token ID | 13 |
Feature activation | +1.51e-02 |
Pos logit contributions | |
keeping | +0.106 |
leaf | +0.104 |
outage | +0.102 |
eway | +0.098 |
fright | +0.097 |
Neg logit contributions | |
nicely | -0.042 |
fair | -0.041 |
nice | -0.037 |
favorite | -0.036 |
aws | -0.036 |
Token | Tim |
Token ID | 5045 |
Feature activation | +4.40e-02 |
Pos logit contributions | |
keeping | +0.023 |
Maria | +0.022 |
Judy | +0.022 |
fright | +0.022 |
outage | +0.021 |
Neg logit contributions | |
uddle | -0.025 |
nicely | -0.025 |
fair | -0.024 |
aws | -0.024 |
favorite | -0.023 |
Token | liked |
Token ID | 8288 |
Feature activation | +3.40e-02 |
Pos logit contributions | |
keeping | +0.094 |
leaf | +0.090 |
outage | +0.089 |
fright | +0.087 |
Judy | +0.085 |
Neg logit contributions | |
fair | -0.053 |
nicely | -0.053 |
favorite | -0.049 |
aws | -0.048 |
uddle | -0.047 |
Token | to |
Token ID | 284 |
Feature activation | +8.63e-02 |
Pos logit contributions | |
keeping | +0.064 |
leaf | +0.060 |
outage | +0.059 |
fright | +0.058 |
harm | +0.058 |
Neg logit contributions | |
uddle | -0.048 |
nicely | -0.047 |
aws | -0.046 |
fair | -0.043 |
udd | -0.042 |
Token | joke |
Token ID | 9707 |
Feature activation | +1.37e-02 |
Pos logit contributions | |
keeping | +0.174 |
outage | +0.162 |
leaf | +0.161 |
fright | +0.158 |
harm | +0.157 |
Neg logit contributions | |
nicely | -0.110 |
fair | -0.107 |
uddle | -0.105 |
aws | -0.102 |
favorite | -0.100 |
Token | and |
Token ID | 290 |
Feature activation | +3.91e-02 |
Pos logit contributions | |
keeping | +0.026 |
icated | +0.026 |
leaf | +0.026 |
ull | +0.026 |
fright | +0.025 |
Neg logit contributions | |
uddle | -0.015 |
sandbox | -0.015 |
aws | -0.015 |
nicely | -0.014 |
fair | -0.014 |
Token | play |
Token ID | 711 |
Feature activation | -6.11e-02 |
Pos logit contributions | |
keeping | +0.078 |
leaf | +0.072 |
outage | +0.072 |
eway | +0.069 |
tal | +0.069 |
Neg logit contributions | |
nicely | -0.055 |
fair | -0.055 |
favorite | -0.051 |
sandbox | -0.050 |
nice | -0.049 |
Token | with |
Token ID | 351 |
Feature activation | +1.52e-02 |
Pos logit contributions | |
aws | +0.089 |
udd | +0.084 |
uddle | +0.084 |
salon | +0.081 |
cart | +0.081 |
Neg logit contributions | |
e | -0.100 |
fright | -0.094 |
ull | -0.090 |
harm | -0.088 |
grant | -0.087 |
Token | his |
Token ID | 465 |
Feature activation | -6.10e-02 |
Pos logit contributions | |
keeping | +0.029 |
fright | +0.027 |
leaf | +0.027 |
harm | +0.027 |
ull | +0.027 |
Neg logit contributions | |
uddle | -0.020 |
nicely | -0.020 |
aws | -0.019 |
fair | -0.019 |
favorite | -0.018 |
Token | friends |
Token ID | 2460 |
Feature activation | +7.05e-02 |
Pos logit contributions | |
nicely | +0.051 |
fair | +0.049 |
uddle | +0.049 |
aws | +0.046 |
favorite | +0.046 |
Neg logit contributions | |
keeping | -0.150 |
outage | -0.143 |
leaf | -0.142 |
fright | -0.139 |
harm | -0.139 |
Token | . |
Token ID | 13 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
keeping | +0.111 |
harm | +0.106 |
e | +0.106 |
fly | +0.105 |
fright | +0.103 |
Neg logit contributions | |
uddle | -0.117 |
aws | -0.110 |
udd | -0.104 |
fair | -0.104 |
sandbox | -0.103 |
Token | . |
Token ID | 13 |
Feature activation | +8.51e-03 |
Pos logit contributions | |
uddle | +0.089 |
nicely | +0.088 |
fair | +0.087 |
aws | +0.086 |
favorite | +0.084 |
Neg logit contributions | |
keeping | -0.130 |
outage | -0.119 |
harm | -0.117 |
fright | -0.116 |
leaf | -0.116 |
Token | He |
Token ID | 679 |
Feature activation | -4.33e-02 |
Pos logit contributions | |
keeping | +0.019 |
outage | +0.018 |
leaf | +0.018 |
fright | +0.018 |
harm | +0.018 |
Neg logit contributions | |
nicely | -0.008 |
fair | -0.008 |
uddle | -0.008 |
aws | -0.008 |
favorite | -0.008 |
Token | sees |
Token ID | 7224 |
Feature activation | -2.13e-02 |
Pos logit contributions | |
nicely | +0.070 |
fair | +0.068 |
uddle | +0.068 |
aws | +0.067 |
favorite | +0.065 |
Neg logit contributions | |
keeping | -0.073 |
outage | -0.067 |
leaf | -0.066 |
fright | -0.064 |
fly | -0.064 |
Token | the |
Token ID | 262 |
Feature activation | -2.01e-02 |
Pos logit contributions | |
nicely | +0.028 |
fair | +0.028 |
aws | +0.027 |
uddle | +0.027 |
favorite | +0.026 |
Neg logit contributions | |
keeping | -0.042 |
outage | -0.040 |
leaf | -0.039 |
fly | -0.038 |
fright | -0.038 |
Token | damage |
Token ID | 2465 |
Feature activation | +1.98e-02 |
Pos logit contributions | |
nicely | +0.019 |
fair | +0.018 |
uddle | +0.018 |
aws | +0.017 |
favorite | +0.017 |
Neg logit contributions | |
keeping | -0.048 |
outage | -0.045 |
leaf | -0.045 |
Judy | -0.044 |
fly | -0.044 |
Token | and |
Token ID | 290 |
Feature activation | +4.38e-02 |
Pos logit contributions | |
keeping | +0.046 |
leaf | +0.044 |
outage | +0.044 |
Judy | +0.043 |
fly | +0.043 |
Neg logit contributions | |
nicely | -0.020 |
fair | -0.019 |
aws | -0.017 |
cart | -0.017 |
uddle | -0.017 |
Token | feels |
Token ID | 5300 |
Feature activation | -2.33e-02 |
Pos logit contributions | |
keeping | +0.088 |
outage | +0.086 |
leaf | +0.086 |
jo | +0.085 |
fly | +0.084 |
Neg logit contributions | |
fair | -0.056 |
nicely | -0.055 |
sandbox | -0.051 |
cooler | -0.050 |
cart | -0.049 |
Token | sad |
Token ID | 6507 |
Feature activation | +2.43e-03 |
Pos logit contributions | |
nicely | +0.025 |
uddle | +0.025 |
fair | +0.025 |
aws | +0.024 |
favorite | +0.023 |
Neg logit contributions | |
keeping | -0.050 |
outage | -0.047 |
leaf | -0.047 |
fright | -0.046 |
harm | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +8.67e-03 |
Pos logit contributions | |
keeping | +0.004 |
outage | +0.004 |
harm | +0.004 |
fright | +0.004 |
fly | +0.003 |
Neg logit contributions | |
uddle | -0.004 |
aws | -0.004 |
nicely | -0.004 |
fair | -0.004 |
cart | -0.004 |
Token | He |
Token ID | 679 |
Feature activation | -4.28e-02 |
Pos logit contributions | |
keeping | +0.020 |
outage | +0.018 |
leaf | +0.018 |
fright | +0.018 |
Judy | +0.018 |
Neg logit contributions | |
nicely | -0.009 |
fair | -0.009 |
uddle | -0.009 |
aws | -0.008 |
favorite | -0.008 |
Token | loves |
Token ID | 10408 |
Feature activation | +3.02e-02 |
Pos logit contributions | |
fair | +0.070 |
nicely | +0.070 |
uddle | +0.069 |
aws | +0.068 |
favorite | +0.068 |
Neg logit contributions | |
keeping | -0.075 |
outage | -0.067 |
Judy | -0.064 |
leaf | -0.064 |
fly | -0.063 |
Token | wall |
Token ID | 3355 |
Feature activation | +3.98e-02 |
Pos logit contributions | |
fair | +0.092 |
nicely | +0.085 |
nice | +0.081 |
favorite | +0.080 |
gym | +0.077 |
Neg logit contributions | |
keeping | -0.136 |
leaf | -0.134 |
outage | -0.124 |
rum | -0.122 |
Judy | -0.122 |
Token | . |
Token ID | 13 |
Feature activation | +8.31e-03 |
Pos logit contributions | |
keeping | +0.084 |
leaf | +0.080 |
outage | +0.075 |
fright | +0.075 |
Judy | +0.074 |
Neg logit contributions | |
nicely | -0.051 |
fair | -0.049 |
uddle | -0.045 |
favorite | -0.044 |
nice | -0.043 |
Token | â |
Token ID | 22940 |
Feature activation | +7.53e-02 |
Pos logit contributions | |
keeping | +0.018 |
outage | +0.017 |
harm | +0.017 |
fly | +0.016 |
leaf | +0.016 |
Neg logit contributions | |
nicely | -0.010 |
fair | -0.009 |
aws | -0.009 |
uddle | -0.009 |
cart | -0.008 |
Token | € |
Token ID | 26391 |
Feature activation | +1.19e-02 |
Pos logit contributions | |
keeping | +0.113 |
outage | +0.107 |
fright | +0.103 |
harm | +0.101 |
leaf | +0.101 |
Neg logit contributions | |
nicely | -0.137 |
fair | -0.134 |
uddle | -0.125 |
aws | -0.123 |
nice | -0.119 |
Token | Tim |
Token ID | 5045 |
Feature activation | +4.27e-02 |
Pos logit contributions | |
keeping | +0.021 |
leaf | +0.021 |
outage | +0.020 |
harm | +0.020 |
fly | +0.019 |
Neg logit contributions | |
nicely | -0.018 |
uddle | -0.017 |
fair | -0.017 |
aws | -0.016 |
favorite | -0.016 |
Token | thought |
Token ID | 1807 |
Feature activation | +5.38e-02 |
Pos logit contributions | |
keeping | +0.089 |
outage | +0.084 |
leaf | +0.081 |
eway | +0.080 |
harm | +0.078 |
Neg logit contributions | |
nicely | -0.056 |
fair | -0.056 |
uddle | -0.053 |
aws | -0.051 |
favorite | -0.051 |
Token | that |
Token ID | 326 |
Feature activation | +1.70e-04 |
Pos logit contributions | |
keeping | +0.112 |
fly | +0.102 |
leaf | +0.101 |
outage | +0.100 |
fright | +0.098 |
Neg logit contributions | |
nicely | -0.072 |
fair | -0.071 |
favorite | -0.065 |
uddle | -0.065 |
aws | -0.064 |
Token | was |
Token ID | 373 |
Feature activation | +1.55e-03 |
Pos logit contributions | |
keeping | +0.000 |
leaf | +0.000 |
outage | +0.000 |
fly | +0.000 |
fright | +0.000 |
Neg logit contributions | |
fair | -0.000 |
nicely | -0.000 |
best | -0.000 |
nice | -0.000 |
gym | -0.000 |
Token | a |
Token ID | 257 |
Feature activation | +3.54e-02 |
Pos logit contributions | |
keeping | +0.004 |
outage | +0.003 |
leaf | +0.003 |
jo | +0.003 |
fly | +0.003 |
Neg logit contributions | |
fair | -0.002 |
nicely | -0.002 |
nice | -0.002 |
favorite | -0.002 |
cooler | -0.002 |
Token | great |
Token ID | 1049 |
Feature activation | -1.02e-02 |
Pos logit contributions | |
keeping | +0.086 |
outage | +0.082 |
leaf | +0.082 |
fright | +0.081 |
harm | +0.081 |
Neg logit contributions | |
nicely | -0.028 |
uddle | -0.027 |
fair | -0.026 |
aws | -0.026 |
favorite | -0.024 |
Token | idea |
Token ID | 2126 |
Feature activation | +3.72e-02 |
Pos logit contributions | |
nicely | +0.016 |
fair | +0.016 |
uddle | +0.015 |
favorite | +0.015 |
aws | +0.015 |
Neg logit contributions | |
keeping | -0.018 |
leaf | -0.016 |
outage | -0.016 |
fly | -0.016 |
fright | -0.015 |
Token | came |
Token ID | 1625 |
Feature activation | -7.38e-02 |
Pos logit contributions | |
keeping | +0.113 |
outage | +0.102 |
leaf | +0.100 |
eway | +0.099 |
tal | +0.098 |
Neg logit contributions | |
fair | -0.060 |
nicely | -0.059 |
favorite | -0.052 |
nice | -0.052 |
gym | -0.052 |
Token | up |
Token ID | 510 |
Feature activation | -1.01e-01 |
Pos logit contributions | |
fair | +0.067 |
nicely | +0.064 |
nice | +0.056 |
aws | +0.055 |
favorite | +0.052 |
Neg logit contributions | |
keeping | -0.178 |
outage | -0.175 |
leaf | -0.171 |
tal | -0.169 |
grant | -0.166 |
Token | to |
Token ID | 284 |
Feature activation | +8.94e-02 |
Pos logit contributions | |
nicely | +0.088 |
fair | +0.086 |
favorite | +0.069 |
nice | +0.068 |
gym | +0.067 |
Neg logit contributions | |
keeping | -0.255 |
outage | -0.242 |
leaf | -0.238 |
tal | -0.232 |
rum | -0.231 |
Token | them |
Token ID | 606 |
Feature activation | -4.91e-02 |
Pos logit contributions | |
outage | +0.214 |
keeping | +0.211 |
leaf | +0.199 |
fly | +0.199 |
eway | +0.197 |
Neg logit contributions | |
nicely | -0.090 |
fair | -0.090 |
nice | -0.077 |
favorite | -0.070 |
aws | -0.069 |
Token | and |
Token ID | 290 |
Feature activation | +4.07e-02 |
Pos logit contributions | |
nicely | +0.054 |
fair | +0.052 |
uddle | +0.050 |
aws | +0.049 |
favorite | +0.048 |
Neg logit contributions | |
keeping | -0.108 |
leaf | -0.102 |
outage | -0.102 |
fright | -0.099 |
harm | -0.099 |
Token | asked |
Token ID | 1965 |
Feature activation | +1.94e-02 |
Pos logit contributions | |
keeping | +0.080 |
outage | +0.071 |
eway | +0.070 |
ull | +0.069 |
rum | +0.069 |
Neg logit contributions | |
fair | -0.060 |
nicely | -0.058 |
favorite | -0.054 |
nice | -0.054 |
cooler | -0.053 |
Token | for |
Token ID | 329 |
Feature activation | +5.85e-02 |
Pos logit contributions | |
keeping | +0.033 |
Maria | +0.033 |
harm | +0.032 |
Judy | +0.032 |
fright | +0.032 |
Neg logit contributions | |
uddle | -0.030 |
aws | -0.026 |
sandbox | -0.026 |
udd | -0.025 |
favorite | -0.024 |
Token | directions |
Token ID | 11678 |
Feature activation | -1.28e-03 |
Pos logit contributions | |
keeping | +0.101 |
leaf | +0.094 |
fright | +0.093 |
harm | +0.093 |
outage | +0.092 |
Neg logit contributions | |
uddle | -0.090 |
nicely | -0.087 |
aws | -0.086 |
fair | -0.083 |
cart | -0.081 |
Token | . |
Token ID | 13 |
Feature activation | +1.76e-02 |
Pos logit contributions | |
nicely | +0.002 |
fair | +0.002 |
uddle | +0.002 |
aws | +0.002 |
favorite | +0.001 |
Neg logit contributions | |
keeping | -0.003 |
outage | -0.002 |
leaf | -0.002 |
fright | -0.002 |
harm | -0.002 |
Token | Lily |
Token ID | 20037 |
Feature activation | +3.42e-02 |
Pos logit contributions | |
Maria | +0.027 |
keeping | +0.027 |
fright | +0.025 |
grant | +0.025 |
Judy | +0.025 |
Neg logit contributions | |
aws | -0.028 |
uddle | -0.028 |
udd | -0.026 |
best | -0.026 |
sandbox | -0.025 |
Token | 's |
Token ID | 338 |
Feature activation | -4.72e-02 |
Pos logit contributions | |
keeping | +0.054 |
outage | +0.049 |
leaf | +0.049 |
fright | +0.048 |
fly | +0.048 |
Neg logit contributions | |
nicely | -0.057 |
fair | -0.056 |
uddle | -0.056 |
aws | -0.055 |
favorite | -0.053 |
Token | He |
Token ID | 679 |
Feature activation | -4.23e-02 |
Pos logit contributions | |
keeping | +0.044 |
outage | +0.043 |
harm | +0.042 |
leaf | +0.042 |
fright | +0.040 |
Neg logit contributions | |
nicely | -0.020 |
aws | -0.018 |
fair | -0.018 |
gym | -0.017 |
favorite | -0.017 |
Token | guessed |
Token ID | 25183 |
Feature activation | +1.03e-02 |
Pos logit contributions | |
nicely | +0.059 |
fair | +0.058 |
favorite | +0.055 |
uddle | +0.053 |
cooler | +0.052 |
Neg logit contributions | |
keeping | -0.087 |
outage | -0.080 |
leaf | -0.078 |
icated | -0.075 |
harm | -0.073 |
Token | it |
Token ID | 340 |
Feature activation | -5.17e-02 |
Pos logit contributions | |
keeping | +0.023 |
outage | +0.021 |
leaf | +0.021 |
fright | +0.020 |
harm | +0.020 |
Neg logit contributions | |
nicely | -0.012 |
fair | -0.011 |
aws | -0.011 |
uddle | -0.010 |
favorite | -0.010 |
Token | might |
Token ID | 1244 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
nicely | +0.070 |
fair | +0.068 |
favorite | +0.066 |
aws | +0.062 |
nice | +0.062 |
Neg logit contributions | |
keeping | -0.107 |
leaf | -0.102 |
outage | -0.098 |
fright | -0.093 |
harm | -0.093 |
Token | be |
Token ID | 307 |
Feature activation | -6.11e-02 |
Pos logit contributions | |
keeping | +0.240 |
leaf | +0.238 |
jo | +0.213 |
icated | +0.212 |
ez | +0.209 |
Neg logit contributions | |
fair | -0.116 |
nicely | -0.114 |
nice | -0.107 |
favorite | -0.096 |
cooler | -0.092 |
Token | an |
Token ID | 281 |
Feature activation | +7.15e-02 |
Pos logit contributions | |
fair | +0.078 |
nicely | +0.072 |
nice | +0.070 |
favorite | +0.065 |
cooler | +0.064 |
Neg logit contributions | |
keeping | -0.134 |
leaf | -0.131 |
outage | -0.127 |
jo | -0.121 |
Warning | -0.119 |
Token | animal |
Token ID | 5044 |
Feature activation | +6.68e-02 |
Pos logit contributions | |
keeping | +0.139 |
leaf | +0.129 |
outage | +0.128 |
Judy | +0.124 |
fright | +0.124 |
Neg logit contributions | |
fair | -0.099 |
nicely | -0.098 |
favorite | -0.091 |
uddle | -0.089 |
aws | -0.089 |
Token | and |
Token ID | 290 |
Feature activation | +4.23e-02 |
Pos logit contributions | |
keeping | +0.157 |
leaf | +0.152 |
outage | +0.144 |
icated | +0.144 |
Judy | +0.143 |
Neg logit contributions | |
nicely | -0.067 |
fair | -0.065 |
favorite | -0.057 |
uddle | -0.056 |
nice | -0.056 |
Token | he |
Token ID | 339 |
Feature activation | -1.14e-01 |
Pos logit contributions | |
keeping | +0.093 |
leaf | +0.086 |
outage | +0.085 |
jo | +0.083 |
rum | +0.083 |
Neg logit contributions | |
fair | -0.053 |
nicely | -0.051 |
nice | -0.048 |
favorite | -0.047 |
cooler | -0.046 |
Token | got |
Token ID | 1392 |
Feature activation | -1.19e-01 |
Pos logit contributions | |
nicely | +0.151 |
fair | +0.149 |
favorite | +0.148 |
nice | +0.135 |
cooler | +0.128 |
Neg logit contributions | |
keeping | -0.245 |
outage | -0.224 |
leaf | -0.221 |
icated | -0.220 |
Warning | -0.210 |
Token | ready |
Token ID | 3492 |
Feature activation | -3.29e-02 |
Pos logit contributions | |
nicely | +0.129 |
fair | +0.128 |
nice | +0.121 |
favorite | +0.111 |
cooler | +0.107 |
Neg logit contributions | |
outage | -0.275 |
keeping | -0.272 |
jo | -0.266 |
icated | -0.258 |
rated | -0.254 |
Token | end |
Token ID | 886 |
Feature activation | -1.23e-01 |
Pos logit contributions | |
harm | +0.048 |
fright | +0.048 |
keeping | +0.048 |
outage | +0.047 |
leaf | +0.046 |
Neg logit contributions | |
uddle | -0.045 |
nicely | -0.044 |
aws | -0.043 |
fair | -0.041 |
favorite | -0.039 |
Token | . |
Token ID | 13 |
Feature activation | +6.51e-03 |
Pos logit contributions | |
nicely | +0.119 |
fair | +0.094 |
uddle | +0.092 |
nice | +0.089 |
favorite | +0.082 |
Neg logit contributions | |
keeping | -0.311 |
leaf | -0.293 |
Judy | -0.287 |
icated | -0.282 |
rated | -0.277 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
keeping | +0.017 |
outage | +0.016 |
rated | +0.016 |
leaf | +0.016 |
eway | +0.016 |
Neg logit contributions | |
nicely | -0.005 |
aws | -0.004 |
fair | -0.004 |
favorite | -0.004 |
nice | -0.004 |
Token | Once |
Token ID | 7454 |
Feature activation | -5.18e-03 |
Pos logit contributions | |
outage | +0.266 |
leaf | +0.263 |
keeping | +0.259 |
fly | +0.258 |
harm | +0.253 |
Neg logit contributions | |
nicely | -0.083 |
aws | -0.075 |
fair | -0.070 |
gym | -0.066 |
nice | -0.062 |
Token | there |
Token ID | 612 |
Feature activation | +2.54e-03 |
Pos logit contributions | |
nicely | +0.008 |
fair | +0.008 |
uddle | +0.008 |
aws | +0.008 |
favorite | +0.007 |
Neg logit contributions | |
keeping | -0.009 |
outage | -0.008 |
leaf | -0.008 |
fright | -0.008 |
harm | -0.008 |
Token | was |
Token ID | 373 |
Feature activation | +2.46e-03 |
Pos logit contributions | |
keeping | +0.005 |
outage | +0.004 |
leaf | +0.004 |
harm | +0.004 |
fly | +0.004 |
Neg logit contributions | |
nicely | -0.004 |
fair | -0.004 |
uddle | -0.004 |
aws | -0.004 |
favorite | -0.004 |
Token | a |
Token ID | 257 |
Feature activation | +4.02e-02 |
Pos logit contributions | |
keeping | +0.005 |
outage | +0.005 |
leaf | +0.005 |
fly | +0.005 |
harm | +0.005 |
Neg logit contributions | |
fair | -0.003 |
nicely | -0.003 |
favorite | -0.003 |
aws | -0.003 |
uddle | -0.003 |
Token | nos |
Token ID | 43630 |
Feature activation | +8.14e-02 |
Pos logit contributions | |
ull | +0.066 |
Fairy | +0.066 |
fly | +0.066 |
fright | +0.065 |
leaf | +0.064 |
Neg logit contributions | |
aws | -0.057 |
uddle | -0.049 |
nicely | -0.049 |
udd | -0.046 |
pit | -0.045 |
Token | y |
Token ID | 88 |
Feature activation | -3.06e-02 |
Pos logit contributions | |
keeping | +0.173 |
fright | +0.158 |
leaf | +0.157 |
grant | +0.156 |
eway | +0.152 |
Neg logit contributions | |
fair | -0.096 |
nicely | -0.096 |
aws | -0.096 |
favorite | -0.088 |
uddle | -0.088 |
Token | little |
Token ID | 1310 |
Feature activation | +8.74e-02 |
Pos logit contributions | |
aws | +0.042 |
nicely | +0.042 |
uddle | +0.041 |
fair | +0.040 |
favorite | +0.039 |
Neg logit contributions | |
keeping | -0.054 |
fly | -0.052 |
outage | -0.052 |
leaf | -0.051 |
harm | -0.050 |
Token | girl |
Token ID | 2576 |
Feature activation | +8.03e-02 |
Pos logit contributions | |
fly | +0.132 |
outage | +0.128 |
keeping | +0.124 |
fright | +0.124 |
leaf | +0.124 |
Neg logit contributions | |
nicely | -0.142 |
aws | -0.142 |
Days | -0.134 |
uddle | -0.134 |
fair | -0.126 |
Token | to |
Token ID | 284 |
Feature activation | +8.23e-02 |
Pos logit contributions | |
nicely | +0.045 |
fair | +0.043 |
nice | +0.039 |
favorite | +0.037 |
hockey | +0.034 |
Neg logit contributions | |
keeping | -0.117 |
fly | -0.111 |
outage | -0.107 |
Fairy | -0.107 |
fright | -0.106 |
Token | remember |
Token ID | 3505 |
Feature activation | +5.49e-03 |
Pos logit contributions | |
keeping | +0.143 |
leaf | +0.130 |
outage | +0.129 |
Judy | +0.125 |
fly | +0.124 |
Neg logit contributions | |
nicely | -0.133 |
fair | -0.129 |
uddle | -0.119 |
favorite | -0.118 |
nice | -0.118 |
Token | all |
Token ID | 477 |
Feature activation | +7.56e-03 |
Pos logit contributions | |
keeping | +0.011 |
leaf | +0.011 |
outage | +0.011 |
fly | +0.010 |
fright | +0.010 |
Neg logit contributions | |
nicely | -0.007 |
fair | -0.007 |
favorite | -0.006 |
nice | -0.006 |
aws | -0.006 |
Token | the |
Token ID | 262 |
Feature activation | -2.71e-02 |
Pos logit contributions | |
keeping | +0.018 |
fly | +0.017 |
leaf | +0.016 |
grant | +0.016 |
Fairy | +0.016 |
Neg logit contributions | |
fair | -0.008 |
nicely | -0.008 |
nice | -0.008 |
new | -0.007 |
hockey | -0.007 |
Token | things |
Token ID | 1243 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
fair | +0.031 |
nicely | +0.030 |
favorite | +0.028 |
uddle | +0.028 |
nice | +0.027 |
Neg logit contributions | |
keeping | -0.059 |
fly | -0.055 |
outage | -0.055 |
leaf | -0.055 |
Judy | -0.054 |
Token | that |
Token ID | 326 |
Feature activation | -4.94e-03 |
Pos logit contributions | |
nicely | +0.015 |
fair | +0.014 |
uddle | +0.013 |
favorite | +0.013 |
aws | +0.013 |
Neg logit contributions | |
keeping | -0.034 |
outage | -0.032 |
leaf | -0.032 |
fright | -0.031 |
fly | -0.031 |
Token | he |
Token ID | 339 |
Feature activation | -1.16e-01 |
Pos logit contributions | |
fair | +0.007 |
nicely | +0.007 |
favorite | +0.006 |
nice | +0.006 |
gym | +0.006 |
Neg logit contributions | |
keeping | -0.010 |
leaf | -0.009 |
outage | -0.009 |
rum | -0.009 |
fly | -0.009 |
Token | had |
Token ID | 550 |
Feature activation | +2.97e-02 |
Pos logit contributions | |
nicely | +0.156 |
fair | +0.155 |
favorite | +0.146 |
nice | +0.136 |
aws | +0.129 |
Neg logit contributions | |
keeping | -0.243 |
outage | -0.225 |
eway | -0.224 |
Judy | -0.214 |
rum | -0.213 |
Token | to |
Token ID | 284 |
Feature activation | +8.12e-02 |
Pos logit contributions | |
keeping | +0.050 |
fly | +0.049 |
outage | +0.049 |
leaf | +0.047 |
jo | +0.047 |
Neg logit contributions | |
fair | -0.048 |
nicely | -0.048 |
nice | -0.044 |
favorite | -0.044 |
aws | -0.043 |
Token | do |
Token ID | 466 |
Feature activation | +1.65e-02 |
Pos logit contributions | |
keeping | +0.140 |
leaf | +0.132 |
outage | +0.131 |
fly | +0.129 |
jo | +0.125 |
Neg logit contributions | |
nicely | -0.133 |
fair | -0.131 |
nice | -0.121 |
hockey | -0.113 |
favorite | -0.112 |
Token | . |
Token ID | 13 |
Feature activation | +5.87e-03 |
Pos logit contributions | |
keeping | +0.035 |
leaf | +0.033 |
fly | +0.033 |
outage | +0.033 |
fright | +0.032 |
Neg logit contributions | |
nicely | -0.020 |
fair | -0.019 |
favorite | -0.017 |
uddle | -0.017 |
aws | -0.017 |
Token | playing |
Token ID | 2712 |
Feature activation | -1.08e-01 |
Pos logit contributions | |
uddle | +0.021 |
nicely | +0.020 |
aws | +0.018 |
Days | +0.017 |
fair | +0.017 |
Neg logit contributions | |
keeping | -0.048 |
Judy | -0.048 |
Maria | -0.047 |
e | -0.047 |
leaf | -0.047 |
Token | in |
Token ID | 287 |
Feature activation | +5.81e-02 |
Pos logit contributions | |
uddle | +0.139 |
udd | +0.134 |
aws | +0.131 |
Days | +0.129 |
handsome | +0.128 |
Neg logit contributions | |
e | -0.192 |
keeping | -0.184 |
fright | -0.179 |
fly | -0.178 |
ull | -0.178 |
Token | the |
Token ID | 262 |
Feature activation | -1.90e-02 |
Pos logit contributions | |
keeping | +0.111 |
outage | +0.108 |
leaf | +0.107 |
fly | +0.104 |
rated | +0.102 |
Neg logit contributions | |
fair | -0.079 |
nicely | -0.078 |
aws | -0.072 |
gym | -0.072 |
favorite | -0.072 |
Token | park |
Token ID | 3952 |
Feature activation | -6.97e-02 |
Pos logit contributions | |
nicely | +0.025 |
uddle | +0.025 |
Days | +0.024 |
aws | +0.024 |
udd | +0.023 |
Neg logit contributions | |
keeping | -0.032 |
Maria | -0.031 |
Judy | -0.030 |
sorts | -0.030 |
Lou | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +1.65e-02 |
Pos logit contributions | |
uddle | +0.101 |
fair | +0.095 |
nicely | +0.095 |
aws | +0.094 |
udd | +0.091 |
Neg logit contributions | |
keeping | -0.124 |
fly | -0.112 |
harm | -0.112 |
fright | -0.112 |
outage | -0.111 |
Token | They |
Token ID | 1119 |
Feature activation | -3.74e-02 |
Pos logit contributions | |
Judy | +0.026 |
keeping | +0.025 |
Maria | +0.024 |
ull | +0.023 |
fly | +0.023 |
Neg logit contributions | |
uddle | -0.026 |
fair | -0.026 |
nicely | -0.026 |
handsome | -0.025 |
Days | -0.024 |
Token | saw |
Token ID | 2497 |
Feature activation | -1.69e-02 |
Pos logit contributions | |
nicely | +0.051 |
uddle | +0.050 |
fair | +0.049 |
aws | +0.048 |
favorite | +0.046 |
Neg logit contributions | |
keeping | -0.070 |
leaf | -0.066 |
outage | -0.066 |
fright | -0.065 |
Judy | -0.064 |
Token | a |
Token ID | 257 |
Feature activation | +3.96e-02 |
Pos logit contributions | |
nicely | +0.026 |
uddle | +0.026 |
fair | +0.024 |
aws | +0.023 |
cart | +0.023 |
Neg logit contributions | |
keeping | -0.028 |
leaf | -0.027 |
Judy | -0.026 |
fright | -0.026 |
ull | -0.026 |
Token | big |
Token ID | 1263 |
Feature activation | +8.45e-03 |
Pos logit contributions | |
outage | +0.078 |
fright | +0.078 |
fly | +0.077 |
leaf | +0.075 |
ull | +0.074 |
Neg logit contributions | |
nicely | -0.044 |
aws | -0.042 |
favorite | -0.038 |
udd | -0.037 |
Days | -0.036 |
Token | jet |
Token ID | 12644 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
keeping | +0.015 |
fright | +0.015 |
fly | +0.015 |
outage | +0.014 |
Judy | +0.014 |
Neg logit contributions | |
nicely | -0.012 |
aws | -0.011 |
Days | -0.011 |
favorite | -0.011 |
uddle | -0.010 |
Token | in |
Token ID | 287 |
Feature activation | +5.79e-02 |
Pos logit contributions | |
keeping | +0.033 |
leaf | +0.031 |
outage | +0.031 |
fright | +0.030 |
Judy | +0.030 |
Neg logit contributions | |
nicely | -0.018 |
fair | -0.018 |
uddle | -0.017 |
aws | -0.017 |
favorite | -0.016 |
Token | They |
Token ID | 1119 |
Feature activation | -4.07e-02 |
Pos logit contributions | |
keeping | +0.021 |
outage | +0.020 |
Maria | +0.019 |
Judy | +0.019 |
fly | +0.019 |
Neg logit contributions | |
uddle | -0.016 |
nicely | -0.015 |
fair | -0.015 |
aws | -0.015 |
udd | -0.014 |
Token | decided |
Token ID | 3066 |
Feature activation | -4.56e-02 |
Pos logit contributions | |
nicely | +0.064 |
fair | +0.063 |
favorite | +0.059 |
uddle | +0.059 |
aws | +0.058 |
Neg logit contributions | |
keeping | -0.073 |
outage | -0.067 |
leaf | -0.066 |
harm | -0.063 |
Judy | -0.063 |
Token | to |
Token ID | 284 |
Feature activation | +8.45e-02 |
Pos logit contributions | |
uddle | +0.064 |
aws | +0.062 |
nicely | +0.060 |
fair | +0.058 |
favorite | +0.055 |
Neg logit contributions | |
keeping | -0.087 |
outage | -0.081 |
harm | -0.080 |
fright | -0.079 |
Judy | -0.078 |
Token | call |
Token ID | 869 |
Feature activation | +4.30e-02 |
Pos logit contributions | |
keeping | +0.160 |
leaf | +0.153 |
outage | +0.148 |
jo | +0.143 |
Judy | +0.143 |
Neg logit contributions | |
nicely | -0.127 |
fair | -0.122 |
nice | -0.109 |
uddle | -0.108 |
favorite | -0.108 |
Token | in |
Token ID | 287 |
Feature activation | +5.30e-02 |
Pos logit contributions | |
keeping | +0.104 |
fly | +0.096 |
outage | +0.095 |
leaf | +0.095 |
jo | +0.091 |
Neg logit contributions | |
nicely | -0.050 |
fair | -0.047 |
nice | -0.042 |
favorite | -0.041 |
cart | -0.037 |
Token | the |
Token ID | 262 |
Feature activation | -2.79e-02 |
Pos logit contributions | |
keeping | +0.131 |
fly | +0.124 |
leaf | +0.123 |
outage | +0.121 |
ull | +0.120 |
Neg logit contributions | |
nicely | -0.055 |
fair | -0.051 |
nice | -0.047 |
favorite | -0.045 |
hockey | -0.041 |
Token | roof |
Token ID | 9753 |
Feature activation | -2.72e-02 |
Pos logit contributions | |
nicely | +0.032 |
uddle | +0.031 |
aws | +0.031 |
fair | +0.030 |
favorite | +0.028 |
Neg logit contributions | |
keeping | -0.058 |
outage | -0.055 |
harm | -0.055 |
fright | -0.055 |
leaf | -0.054 |
Token | specialist |
Token ID | 15670 |
Feature activation | +2.49e-02 |
Pos logit contributions | |
nicely | +0.027 |
fair | +0.025 |
uddle | +0.023 |
favorite | +0.023 |
aws | +0.022 |
Neg logit contributions | |
keeping | -0.065 |
leaf | -0.063 |
fright | -0.060 |
harm | -0.059 |
outage | -0.059 |
Token | to |
Token ID | 284 |
Feature activation | +8.42e-02 |
Pos logit contributions | |
keeping | +0.058 |
outage | +0.056 |
leaf | +0.056 |
fright | +0.055 |
ull | +0.055 |
Neg logit contributions | |
nicely | -0.025 |
fair | -0.023 |
aws | -0.021 |
favorite | -0.021 |
cart | -0.020 |
Token | help |
Token ID | 1037 |
Feature activation | +2.92e-02 |
Pos logit contributions | |
keeping | +0.167 |
outage | +0.159 |
jo | +0.157 |
leaf | +0.154 |
fly | +0.154 |
Neg logit contributions | |
nicely | -0.125 |
fair | -0.123 |
nice | -0.113 |
cooler | -0.101 |
hockey | -0.100 |
Token | . |
Token ID | 13 |
Feature activation | +9.84e-03 |
Pos logit contributions | |
keeping | +0.063 |
leaf | +0.062 |
outage | +0.061 |
fly | +0.060 |
icated | +0.057 |
Neg logit contributions | |
nicely | -0.036 |
fair | -0.034 |
nice | -0.031 |
favorite | -0.029 |
cart | -0.028 |
Token | was |
Token ID | 373 |
Feature activation | +1.04e-03 |
Pos logit contributions | |
nicely | +0.002 |
fair | +0.001 |
uddle | +0.001 |
aws | +0.001 |
favorite | +0.001 |
Neg logit contributions | |
keeping | -0.002 |
outage | -0.002 |
leaf | -0.002 |
harm | -0.002 |
fright | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | +3.72e-02 |
Pos logit contributions | |
keeping | +0.002 |
outage | +0.002 |
fly | +0.002 |
harm | +0.002 |
leaf | +0.002 |
Neg logit contributions | |
fair | -0.001 |
nicely | -0.001 |
favorite | -0.001 |
aws | -0.001 |
nice | -0.001 |
Token | boy |
Token ID | 2933 |
Feature activation | +8.39e-02 |
Pos logit contributions | |
ull | +0.062 |
fly | +0.062 |
Fairy | +0.061 |
fright | +0.061 |
leaf | +0.060 |
Neg logit contributions | |
aws | -0.050 |
uddle | -0.046 |
nicely | -0.046 |
udd | -0.042 |
Days | -0.041 |
Token | named |
Token ID | 3706 |
Feature activation | +7.89e-02 |
Pos logit contributions | |
keeping | +0.138 |
outage | +0.128 |
leaf | +0.126 |
harm | +0.124 |
fright | +0.124 |
Neg logit contributions | |
nicely | -0.135 |
fair | -0.133 |
uddle | -0.133 |
aws | -0.130 |
favorite | -0.126 |
Token | Jim |
Token ID | 5395 |
Feature activation | +4.37e-02 |
Pos logit contributions | |
keeping | +0.145 |
outage | +0.135 |
leaf | +0.134 |
fright | +0.132 |
harm | +0.131 |
Neg logit contributions | |
nicely | -0.113 |
fair | -0.111 |
uddle | -0.111 |
aws | -0.108 |
favorite | -0.104 |
Token | . |
Token ID | 13 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
keeping | +0.109 |
leaf | +0.105 |
outage | +0.104 |
fright | +0.099 |
eway | +0.099 |
Neg logit contributions | |
nicely | -0.041 |
fair | -0.038 |
favorite | -0.035 |
nice | -0.034 |
aws | -0.034 |
Token | He |
Token ID | 679 |
Feature activation | -6.04e-02 |
Pos logit contributions | |
keeping | +0.024 |
outage | +0.022 |
leaf | +0.022 |
fright | +0.022 |
harm | +0.022 |
Neg logit contributions | |
nicely | -0.019 |
fair | -0.019 |
uddle | -0.018 |
aws | -0.018 |
favorite | -0.017 |
Token | was |
Token ID | 373 |
Feature activation | +1.73e-03 |
Pos logit contributions | |
nicely | +0.076 |
fair | +0.074 |
uddle | +0.073 |
aws | +0.072 |
favorite | +0.069 |
Neg logit contributions | |
keeping | -0.121 |
outage | -0.114 |
leaf | -0.113 |
fright | -0.111 |
harm | -0.110 |
Token | just |
Token ID | 655 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
keeping | +0.003 |
leaf | +0.003 |
e | +0.003 |
fright | +0.003 |
fly | +0.003 |
Neg logit contributions | |
uddle | -0.003 |
nicely | -0.002 |
aws | -0.002 |
udd | -0.002 |
fair | -0.002 |
Token | three |
Token ID | 1115 |
Feature activation | +1.99e-02 |
Pos logit contributions | |
keeping | +0.216 |
leaf | +0.204 |
outage | +0.204 |
fright | +0.199 |
harm | +0.199 |
Neg logit contributions | |
nicely | -0.119 |
uddle | -0.116 |
fair | -0.113 |
aws | -0.112 |
favorite | -0.105 |
Token | years |
Token ID | 812 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
keeping | +0.020 |
harm | +0.019 |
fright | +0.018 |
e | +0.017 |
rum | +0.017 |
Neg logit contributions | |
nicely | -0.042 |
uddle | -0.041 |
fair | -0.041 |
aws | -0.041 |
sandbox | -0.039 |
Token | door |
Token ID | 3420 |
Feature activation | +2.74e-02 |
Pos logit contributions | |
nicely | +0.029 |
aws | +0.027 |
hockey | +0.026 |
uddle | +0.026 |
fair | +0.025 |
Neg logit contributions | |
keeping | -0.043 |
fright | -0.040 |
outage | -0.040 |
leaf | -0.039 |
Judy | -0.039 |
Token | of |
Token ID | 286 |
Feature activation | +6.48e-02 |
Pos logit contributions | |
keeping | +0.055 |
outage | +0.052 |
leaf | +0.051 |
fright | +0.051 |
Judy | +0.050 |
Neg logit contributions | |
nicely | -0.034 |
uddle | -0.033 |
fair | -0.033 |
aws | -0.032 |
favorite | -0.030 |
Token | the |
Token ID | 262 |
Feature activation | -2.29e-02 |
Pos logit contributions | |
outage | +0.144 |
keeping | +0.141 |
fly | +0.139 |
leaf | +0.137 |
jo | +0.137 |
Neg logit contributions | |
fair | -0.074 |
cart | -0.067 |
cooler | -0.067 |
nicely | -0.066 |
nice | -0.064 |
Token | van |
Token ID | 5719 |
Feature activation | -4.38e-02 |
Pos logit contributions | |
nicely | +0.031 |
aws | +0.030 |
hockey | +0.029 |
uddle | +0.028 |
gym | +0.026 |
Neg logit contributions | |
keeping | -0.044 |
fright | -0.039 |
harm | -0.038 |
outage | -0.038 |
Judy | -0.038 |
Token | and |
Token ID | 290 |
Feature activation | +3.84e-02 |
Pos logit contributions | |
nicely | +0.055 |
fair | +0.054 |
uddle | +0.054 |
aws | +0.053 |
favorite | +0.050 |
Neg logit contributions | |
keeping | -0.088 |
outage | -0.082 |
leaf | -0.080 |
fly | -0.079 |
fright | -0.079 |
Token | got |
Token ID | 1392 |
Feature activation | -1.16e-01 |
Pos logit contributions | |
keeping | +0.076 |
outage | +0.073 |
leaf | +0.070 |
harm | +0.070 |
fright | +0.069 |
Neg logit contributions | |
nicely | -0.051 |
fair | -0.050 |
uddle | -0.047 |
aws | -0.047 |
favorite | -0.047 |
Token | inside |
Token ID | 2641 |
Feature activation | -3.94e-02 |
Pos logit contributions | |
fair | +0.125 |
nicely | +0.124 |
favorite | +0.117 |
nice | +0.110 |
cart | +0.110 |
Neg logit contributions | |
outage | -0.264 |
keeping | -0.262 |
fly | -0.256 |
jo | -0.253 |
harm | -0.248 |
Token | . |
Token ID | 13 |
Feature activation | +1.21e-02 |
Pos logit contributions | |
nicely | +0.035 |
fair | +0.033 |
nice | +0.030 |
new | +0.027 |
favorite | +0.027 |
Neg logit contributions | |
keeping | -0.101 |
fly | -0.098 |
leaf | -0.097 |
outage | -0.096 |
icated | -0.094 |
Token | The |
Token ID | 383 |
Feature activation | +2.97e-02 |
Pos logit contributions | |
keeping | +0.026 |
outage | +0.025 |
leaf | +0.025 |
Judy | +0.024 |
fright | +0.024 |
Neg logit contributions | |
nicely | -0.013 |
uddle | -0.013 |
fair | -0.013 |
aws | -0.012 |
favorite | -0.012 |
Token | driver |
Token ID | 4639 |
Feature activation | +3.18e-02 |
Pos logit contributions | |
keeping | +0.061 |
fright | +0.056 |
Judy | +0.055 |
leaf | +0.055 |
ull | +0.055 |
Neg logit contributions | |
nicely | -0.034 |
aws | -0.034 |
uddle | -0.033 |
hockey | -0.032 |
udd | -0.030 |
Token | was |
Token ID | 373 |
Feature activation | +1.80e-03 |
Pos logit contributions | |
keeping | +0.066 |
outage | +0.060 |
leaf | +0.059 |
fright | +0.058 |
harm | +0.058 |
Neg logit contributions | |
nicely | -0.043 |
fair | -0.042 |
uddle | -0.039 |
favorite | -0.039 |
cart | -0.038 |
Token | cy |
Token ID | 948 |
Feature activation | +1.84e-02 |
Pos logit contributions | |
outage | +0.160 |
eway | +0.150 |
harm | +0.145 |
keeping | +0.145 |
ull | +0.144 |
Neg logit contributions | |
nicely | -0.092 |
aws | -0.091 |
uddle | -0.089 |
fair | -0.087 |
cart | -0.081 |
Token | and |
Token ID | 290 |
Feature activation | +4.07e-02 |
Pos logit contributions | |
keeping | +0.034 |
outage | +0.032 |
leaf | +0.032 |
harm | +0.031 |
fright | +0.031 |
Neg logit contributions | |
nicely | -0.026 |
fair | -0.026 |
uddle | -0.025 |
aws | -0.025 |
favorite | -0.024 |
Token | the |
Token ID | 262 |
Feature activation | -2.20e-02 |
Pos logit contributions | |
outage | +0.095 |
keeping | +0.086 |
leaf | +0.084 |
ull | +0.084 |
harm | +0.081 |
Neg logit contributions | |
fair | -0.049 |
nicely | -0.049 |
favorite | -0.041 |
cart | -0.041 |
gym | -0.040 |
Token | dangerous |
Token ID | 4923 |
Feature activation | +2.07e-02 |
Pos logit contributions | |
fair | +0.036 |
nicely | +0.036 |
uddle | +0.035 |
aws | +0.035 |
favorite | +0.034 |
Neg logit contributions | |
keeping | -0.036 |
outage | -0.033 |
leaf | -0.033 |
fright | -0.032 |
harm | -0.032 |
Token | animal |
Token ID | 5044 |
Feature activation | +7.08e-02 |
Pos logit contributions | |
keeping | +0.031 |
leaf | +0.028 |
outage | +0.027 |
Judy | +0.026 |
icated | +0.026 |
Neg logit contributions | |
fair | -0.039 |
nicely | -0.038 |
favorite | -0.037 |
uddle | -0.037 |
nice | -0.036 |
Token | walked |
Token ID | 6807 |
Feature activation | -8.53e-02 |
Pos logit contributions | |
keeping | +0.134 |
leaf | +0.126 |
outage | +0.126 |
icated | +0.122 |
fright | +0.120 |
Neg logit contributions | |
nicely | -0.100 |
fair | -0.098 |
uddle | -0.095 |
aws | -0.093 |
favorite | -0.092 |
Token | together |
Token ID | 1978 |
Feature activation | -2.95e-02 |
Pos logit contributions | |
uddle | +0.121 |
nicely | +0.116 |
aws | +0.114 |
fair | +0.113 |
favorite | +0.110 |
Neg logit contributions | |
keeping | -0.159 |
leaf | -0.144 |
fright | -0.144 |
Judy | -0.143 |
fly | -0.141 |
Token | , |
Token ID | 11 |
Feature activation | -1.51e-01 |
Pos logit contributions | |
uddle | +0.033 |
aws | +0.030 |
nicely | +0.029 |
fair | +0.029 |
udd | +0.028 |
Neg logit contributions | |
keeping | -0.064 |
fright | -0.059 |
harm | -0.059 |
Judy | -0.059 |
fly | -0.059 |
Token | gathering |
Token ID | 11228 |
Feature activation | -9.59e-02 |
Pos logit contributions | |
nicely | +0.190 |
fair | +0.185 |
uddle | +0.185 |
aws | +0.180 |
favorite | +0.173 |
Neg logit contributions | |
keeping | -0.304 |
outage | -0.283 |
leaf | -0.282 |
fright | -0.277 |
harm | -0.276 |
Token | clues |
Token ID | 20195 |
Feature activation | +2.42e-02 |
Pos logit contributions | |
nicely | +0.118 |
fair | +0.115 |
uddle | +0.113 |
aws | +0.111 |
favorite | +0.107 |
Neg logit contributions | |
keeping | -0.195 |
outage | -0.183 |
leaf | -0.182 |
fright | -0.178 |
harm | -0.178 |
Token | to |
Token ID | 284 |
Feature activation | +8.51e-02 |
Pos logit contributions | |
keeping | +0.052 |
leaf | +0.049 |
outage | +0.049 |
fright | +0.048 |
harm | +0.048 |
Neg logit contributions | |
nicely | -0.027 |
fair | -0.026 |
aws | -0.025 |
uddle | -0.025 |
favorite | -0.024 |
Token | , |
Token ID | 11 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
leaf | +0.008 |
keeping | +0.008 |
osh | +0.008 |
outage | +0.008 |
fly | +0.007 |
Neg logit contributions | |
nicely | -0.004 |
fair | -0.004 |
uddle | -0.003 |
aws | -0.003 |
favorite | -0.003 |
Token | so |
Token ID | 523 |
Feature activation | +3.77e-02 |
Pos logit contributions | |
fair | +0.151 |
nicely | +0.145 |
nice | +0.132 |
cart | +0.128 |
cooler | +0.127 |
Neg logit contributions | |
keeping | -0.341 |
leaf | -0.328 |
outage | -0.321 |
fly | -0.312 |
harm | -0.310 |
Token | she |
Token ID | 673 |
Feature activation | -1.04e-01 |
Pos logit contributions | |
keeping | +0.096 |
outage | +0.093 |
leaf | +0.093 |
fly | +0.089 |
harm | +0.088 |
Neg logit contributions | |
nicely | -0.031 |
fair | -0.031 |
nice | -0.026 |
favorite | -0.025 |
aws | -0.024 |
Token | decided |
Token ID | 3066 |
Feature activation | -4.48e-02 |
Pos logit contributions | |
nicely | +0.143 |
fair | +0.139 |
uddle | +0.130 |
favorite | +0.126 |
aws | +0.124 |
Neg logit contributions | |
keeping | -0.216 |
outage | -0.198 |
leaf | -0.192 |
eway | -0.190 |
harm | -0.189 |
Token | to |
Token ID | 284 |
Feature activation | +8.31e-02 |
Pos logit contributions | |
nicely | +0.051 |
fair | +0.050 |
uddle | +0.049 |
aws | +0.048 |
favorite | +0.046 |
Neg logit contributions | |
keeping | -0.095 |
outage | -0.090 |
leaf | -0.089 |
fright | -0.088 |
harm | -0.087 |
Token | go |
Token ID | 467 |
Feature activation | -1.02e-01 |
Pos logit contributions | |
keeping | +0.158 |
leaf | +0.154 |
outage | +0.154 |
icated | +0.149 |
jo | +0.148 |
Neg logit contributions | |
nicely | -0.119 |
fair | -0.115 |
nice | -0.103 |
gym | -0.101 |
cooler | -0.099 |
Token | on |
Token ID | 319 |
Feature activation | +5.23e-02 |
Pos logit contributions | |
nicely | +0.115 |
fair | +0.112 |
uddle | +0.111 |
aws | +0.110 |
favorite | +0.104 |
Neg logit contributions | |
keeping | -0.219 |
outage | -0.206 |
leaf | -0.205 |
fright | -0.201 |
harm | -0.201 |
Token | a |
Token ID | 257 |
Feature activation | +3.13e-02 |
Pos logit contributions | |
keeping | +0.104 |
outage | +0.100 |
fly | +0.099 |
leaf | +0.098 |
ull | +0.096 |
Neg logit contributions | |
nicely | -0.069 |
fair | -0.068 |
favorite | -0.063 |
aws | -0.063 |
uddle | -0.062 |
Token | search |
Token ID | 2989 |
Feature activation | -5.07e-02 |
Pos logit contributions | |
fright | +0.052 |
outage | +0.051 |
harm | +0.050 |
keeping | +0.049 |
Maria | +0.047 |
Neg logit contributions | |
aws | -0.049 |
nicely | -0.046 |
uddle | -0.045 |
udd | -0.043 |
favorite | -0.040 |
Token | for |
Token ID | 329 |
Feature activation | +4.94e-02 |
Pos logit contributions | |
nicely | +0.069 |
fair | +0.066 |
uddle | +0.061 |
nice | +0.060 |
favorite | +0.060 |
Neg logit contributions | |
keeping | -0.101 |
leaf | -0.096 |
fly | -0.095 |
outage | -0.094 |
fright | -0.092 |
Token | it |
Token ID | 340 |
Feature activation | -6.02e-02 |
Pos logit contributions | |
keeping | +0.102 |
outage | +0.098 |
leaf | +0.096 |
fly | +0.095 |
fright | +0.093 |
Neg logit contributions | |
nicely | -0.061 |
fair | -0.060 |
uddle | -0.056 |
aws | -0.056 |
favorite | -0.055 |
Token | she |
Token ID | 673 |
Feature activation | -1.02e-01 |
Pos logit contributions | |
fair | +0.009 |
nicely | +0.009 |
nice | +0.008 |
hockey | +0.007 |
favorite | +0.007 |
Neg logit contributions | |
keeping | -0.020 |
leaf | -0.020 |
outage | -0.019 |
rum | -0.018 |
fly | -0.018 |
Token | was |
Token ID | 373 |
Feature activation | +2.07e-03 |
Pos logit contributions | |
nicely | +0.152 |
fair | +0.149 |
uddle | +0.147 |
aws | +0.143 |
favorite | +0.140 |
Neg logit contributions | |
keeping | -0.186 |
outage | -0.174 |
leaf | -0.170 |
fright | -0.166 |
harm | -0.166 |
Token | stirring |
Token ID | 26547 |
Feature activation | -6.89e-02 |
Pos logit contributions | |
keeping | +0.003 |
leaf | +0.003 |
harm | +0.003 |
Judy | +0.003 |
outage | +0.003 |
Neg logit contributions | |
uddle | -0.003 |
nicely | -0.003 |
aws | -0.003 |
favorite | -0.003 |
fair | -0.003 |
Token | the |
Token ID | 262 |
Feature activation | -2.67e-02 |
Pos logit contributions | |
nicely | +0.068 |
fair | +0.066 |
uddle | +0.065 |
aws | +0.063 |
favorite | +0.061 |
Neg logit contributions | |
keeping | -0.156 |
outage | -0.147 |
leaf | -0.146 |
fright | -0.144 |
harm | -0.144 |
Token | pot |
Token ID | 1787 |
Feature activation | -2.71e-02 |
Pos logit contributions | |
nicely | +0.031 |
fair | +0.031 |
uddle | +0.031 |
aws | +0.030 |
favorite | +0.029 |
Neg logit contributions | |
keeping | -0.056 |
outage | -0.053 |
leaf | -0.052 |
fright | -0.052 |
fly | -0.051 |
Token | , |
Token ID | 11 |
Feature activation | -1.57e-01 |
Pos logit contributions | |
nicely | +0.023 |
fair | +0.022 |
uddle | +0.022 |
aws | +0.021 |
favorite | +0.020 |
Neg logit contributions | |
keeping | -0.065 |
outage | -0.062 |
leaf | -0.061 |
fright | -0.060 |
fly | -0.060 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.84e-02 |
Pos logit contributions | |
nicely | +0.139 |
fair | +0.134 |
favorite | +0.117 |
gym | +0.110 |
aws | +0.110 |
Neg logit contributions | |
keeping | -0.392 |
outage | -0.384 |
leaf | -0.373 |
rum | -0.368 |
fly | -0.364 |
Token | accidentally |
Token ID | 14716 |
Feature activation | +8.39e-04 |
Pos logit contributions | |
keeping | +0.053 |
leaf | +0.050 |
outage | +0.050 |
fright | +0.048 |
harm | +0.048 |
Neg logit contributions | |
nicely | -0.040 |
fair | -0.040 |
uddle | -0.039 |
aws | -0.038 |
favorite | -0.037 |
Token | spilled |
Token ID | 31896 |
Feature activation | -6.58e-02 |
Pos logit contributions | |
keeping | +0.002 |
outage | +0.002 |
leaf | +0.002 |
icated | +0.002 |
eway | +0.002 |
Neg logit contributions | |
nicely | -0.001 |
fair | -0.001 |
p | -0.001 |
cart | -0.001 |
nice | -0.001 |
Token | some |
Token ID | 617 |
Feature activation | -6.82e-03 |
Pos logit contributions | |
nicely | +0.078 |
fair | +0.078 |
aws | +0.072 |
uddle | +0.070 |
cart | +0.070 |
Neg logit contributions | |
keeping | -0.138 |
outage | -0.134 |
leaf | -0.132 |
jo | -0.129 |
fly | -0.129 |
Token | soup |
Token ID | 17141 |
Feature activation | -6.98e-02 |
Pos logit contributions | |
fair | +0.008 |
nicely | +0.008 |
uddle | +0.008 |
nice | +0.008 |
favorite | +0.008 |
Neg logit contributions | |
keeping | -0.014 |
outage | -0.013 |
fly | -0.013 |
Judy | -0.013 |
jo | -0.013 |
Token | his |
Token ID | 465 |
Feature activation | -6.55e-02 |
Pos logit contributions | |
fair | +0.009 |
nicely | +0.009 |
favorite | +0.008 |
hockey | +0.008 |
nice | +0.008 |
Neg logit contributions | |
keeping | -0.013 |
leaf | -0.013 |
outage | -0.012 |
fly | -0.012 |
jo | -0.012 |
Token | home |
Token ID | 1363 |
Feature activation | -5.35e-02 |
Pos logit contributions | |
favorite | +0.087 |
fair | +0.086 |
nicely | +0.085 |
uddle | +0.080 |
nice | +0.078 |
Neg logit contributions | |
keeping | -0.136 |
leaf | -0.134 |
outage | -0.130 |
fly | -0.127 |
Judy | -0.127 |
Token | was |
Token ID | 373 |
Feature activation | +2.54e-03 |
Pos logit contributions | |
nicely | +0.077 |
fair | +0.072 |
favorite | +0.071 |
uddle | +0.070 |
nice | +0.065 |
Neg logit contributions | |
keeping | -0.102 |
leaf | -0.099 |
Judy | -0.096 |
fright | -0.095 |
icated | -0.094 |
Token | special |
Token ID | 2041 |
Feature activation | -3.38e-02 |
Pos logit contributions | |
keeping | +0.006 |
outage | +0.005 |
leaf | +0.005 |
fly | +0.005 |
fright | +0.005 |
Neg logit contributions | |
nicely | -0.003 |
fair | -0.003 |
uddle | -0.002 |
aws | -0.002 |
favorite | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +3.57e-02 |
Pos logit contributions | |
nicely | +0.032 |
fair | +0.032 |
favorite | +0.029 |
uddle | +0.028 |
nice | +0.027 |
Neg logit contributions | |
keeping | -0.080 |
leaf | -0.076 |
icated | -0.074 |
fright | -0.074 |
outage | -0.074 |
Token | he |
Token ID | 339 |
Feature activation | -1.16e-01 |
Pos logit contributions | |
keeping | +0.079 |
outage | +0.075 |
leaf | +0.074 |
fly | +0.073 |
fright | +0.072 |
Neg logit contributions | |
fair | -0.041 |
nicely | -0.040 |
favorite | -0.037 |
nice | -0.036 |
uddle | -0.033 |
Token | no |
Token ID | 645 |
Feature activation | +5.10e-02 |
Pos logit contributions | |
fair | +0.143 |
favorite | +0.143 |
nicely | +0.142 |
best | +0.130 |
nice | +0.126 |
Neg logit contributions | |
keeping | -0.257 |
eway | -0.237 |
outage | -0.234 |
leaf | -0.229 |
Judy | -0.227 |
Token | longer |
Token ID | 2392 |
Feature activation | -1.19e-02 |
Pos logit contributions | |
keeping | +0.116 |
outage | +0.109 |
leaf | +0.109 |
fright | +0.106 |
fly | +0.106 |
Neg logit contributions | |
nicely | -0.052 |
fair | -0.051 |
uddle | -0.049 |
aws | -0.048 |
favorite | -0.047 |
Token | felt |
Token ID | 2936 |
Feature activation | -2.23e-02 |
Pos logit contributions | |
fair | +0.017 |
nicely | +0.016 |
nice | +0.015 |
favorite | +0.014 |
cooler | +0.013 |
Neg logit contributions | |
keeping | -0.023 |
leaf | -0.022 |
eway | -0.022 |
outage | -0.021 |
fly | -0.021 |
Token | the |
Token ID | 262 |
Feature activation | -2.81e-02 |
Pos logit contributions | |
nicely | +0.024 |
fair | +0.023 |
uddle | +0.022 |
aws | +0.022 |
favorite | +0.021 |
Neg logit contributions | |
keeping | -0.049 |
outage | -0.046 |
leaf | -0.046 |
fly | -0.045 |
fright | -0.045 |
Token | need |
Token ID | 761 |
Feature activation | +4.75e-02 |
Pos logit contributions | |
fair | +0.035 |
nicely | +0.033 |
nice | +0.031 |
favorite | +0.031 |
cooler | +0.030 |
Neg logit contributions | |
keeping | -0.060 |
outage | -0.057 |
leaf | -0.057 |
Judy | -0.056 |
jo | -0.056 |