Token | Among |
Token ID | 9754 |
Feature activation | -5.00e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.012 |
iculture | -0.012 |
antly | -0.012 |
Token | Thieves |
Token ID | 40392 |
Feature activation | +4.08e-02 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.029 |
sex | +0.029 |
iculture | +0.028 |
antly | +0.028 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.033 |
タ | -0.031 |
\": | -0.031 |
LM | -0.029 |
Token | is |
Token ID | 318 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
oot | +0.029 |
MFT | +0.027 |
タ | +0.026 |
\": | +0.025 |
LM | +0.024 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.023 |
sex | -0.023 |
antly | -0.023 |
iculture | -0.023 |
Token | a |
Token ID | 257 |
Feature activation | +3.64e-02 |
Pos logit contributions | |
oot | +0.030 |
MFT | +0.029 |
タ | +0.027 |
\": | +0.027 |
LM | +0.026 |
Neg logit contributions | |
weet | -0.030 |
orate | -0.022 |
sex | -0.022 |
antly | -0.022 |
iculture | -0.022 |
Token | h |
Token ID | 289 |
Feature activation | +3.10e-02 |
Pos logit contributions | |
oot | +0.027 |
MFT | +0.026 |
タ | +0.024 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.026 |
orate | -0.019 |
sex | -0.019 |
antly | -0.019 |
iculture | -0.019 |
Token | oot |
Token ID | 1025 |
Feature activation | +5.42e-01 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.015 |
タ | +0.014 |
\": | +0.014 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.029 |
orate | -0.024 |
sex | -0.023 |
antly | -0.023 |
iculture | -0.023 |
Token | from |
Token ID | 422 |
Feature activation | +6.24e-02 |
Pos logit contributions | |
oot | +0.394 |
MFT | +0.375 |
タ | +0.353 |
\": | +0.350 |
LM | +0.337 |
Neg logit contributions | |
weet | -0.395 |
orate | -0.297 |
sex | -0.292 |
antly | -0.288 |
iculture | -0.288 |
Token | bold |
Token ID | 10758 |
Feature activation | -1.36e-01 |
Pos logit contributions | |
oot | +0.043 |
MFT | +0.041 |
タ | +0.039 |
\": | +0.038 |
LM | +0.037 |
Neg logit contributions | |
weet | -0.048 |
orate | -0.036 |
sex | -0.036 |
antly | -0.035 |
iculture | -0.035 |
Token | beginning |
Token ID | 3726 |
Feature activation | +8.84e-02 |
Pos logit contributions | |
weet | +0.101 |
orate | +0.076 |
sex | +0.075 |
antly | +0.074 |
iculture | +0.074 |
Neg logit contributions | |
oot | -0.097 |
MFT | -0.092 |
タ | -0.087 |
\": | -0.086 |
LM | -0.082 |
Token | right |
Token ID | 826 |
Feature activation | +6.78e-02 |
Pos logit contributions | |
oot | +0.069 |
MFT | +0.066 |
タ | +0.062 |
\": | +0.062 |
LM | +0.060 |
Neg logit contributions | |
weet | -0.060 |
orate | -0.044 |
sex | -0.043 |
iculture | -0.042 |
antly | -0.042 |
Token | through |
Token ID | 832 |
Feature activation | -5.76e-02 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.048 |
タ | +0.046 |
\": | +0.045 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.048 |
orate | -0.036 |
sex | -0.035 |
antly | -0.035 |
iculture | -0.035 |
Token | the |
Token ID | 262 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.014 |
orate | -0.011 |
sex | -0.011 |
iculture | -0.010 |
antly | -0.010 |
Token | mess |
Token ID | 2085 |
Feature activation | +6.60e-02 |
Pos logit contributions | |
oot | +0.011 |
MFT | +0.011 |
タ | +0.010 |
\": | +0.010 |
LM | +0.009 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.009 |
sex | -0.009 |
antly | -0.008 |
iculture | -0.008 |
Token | of |
Token ID | 286 |
Feature activation | +1.09e-02 |
Pos logit contributions | |
oot | +0.050 |
MFT | +0.048 |
タ | +0.046 |
\": | +0.045 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.045 |
orate | -0.034 |
sex | -0.033 |
antly | -0.033 |
iculture | -0.033 |
Token | the |
Token ID | 262 |
Feature activation | +5.10e-04 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.008 |
タ | +0.007 |
\": | +0.007 |
LM | +0.007 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.006 |
sex | -0.006 |
iculture | -0.006 |
antly | -0.006 |
Token | inco |
Token ID | 44237 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
oot | +0.000 |
MFT | +0.000 |
タ | +0.000 |
\": | +0.000 |
LM | +0.000 |
Neg logit contributions | |
weet | -0.000 |
orate | -0.000 |
sex | -0.000 |
iculture | -0.000 |
antly | -0.000 |
Token | herent |
Token ID | 8334 |
Feature activation | +5.37e-01 |
Pos logit contributions | |
oot | +0.032 |
MFT | +0.028 |
タ | +0.024 |
\": | +0.023 |
LM | +0.020 |
Neg logit contributions | |
weet | -0.128 |
orate | -0.109 |
sex | -0.108 |
antly | -0.107 |
iculture | -0.107 |
Token | politics |
Token ID | 4819 |
Feature activation | +2.21e-02 |
Pos logit contributions | |
oot | +0.388 |
MFT | +0.369 |
タ | +0.348 |
\": | +0.345 |
LM | +0.333 |
Neg logit contributions | |
weet | -0.392 |
orate | -0.295 |
sex | -0.291 |
iculture | -0.287 |
antly | -0.286 |
Token | that |
Token ID | 326 |
Feature activation | -8.85e-03 |
Pos logit contributions | |
oot | +0.016 |
MFT | +0.015 |
タ | +0.014 |
\": | +0.014 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.012 |
antly | -0.012 |
iculture | -0.012 |
Token | is |
Token ID | 318 |
Feature activation | +3.75e-02 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | has |
Token ID | 468 |
Feature activation | +1.72e-02 |
Pos logit contributions | |
oot | +0.027 |
MFT | +0.026 |
タ | +0.024 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.021 |
sex | -0.020 |
iculture | -0.020 |
antly | -0.020 |
Token | to |
Token ID | 284 |
Feature activation | -2.37e-02 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.010 |
\": | +0.010 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.010 |
sex | -0.010 |
antly | -0.010 |
iculture | -0.010 |
Token | ling |
Token ID | 1359 |
Feature activation | -6.85e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | +3.45e-02 |
Pos logit contributions | |
weet | +0.050 |
orate | +0.037 |
sex | +0.037 |
antly | +0.036 |
iculture | +0.036 |
Neg logit contributions | |
oot | -0.050 |
MFT | -0.048 |
タ | -0.045 |
\": | -0.045 |
LM | -0.043 |
Token |
|
Token ID | 198 |
Feature activation | +2.44e-02 |
Pos logit contributions | |
oot | +0.026 |
MFT | +0.025 |
タ | +0.023 |
\": | +0.023 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.024 |
orate | -0.018 |
sex | -0.018 |
antly | -0.017 |
iculture | -0.017 |
Token | Mind |
Token ID | 28478 |
Feature activation | -3.08e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.015 |
Neg logit contributions | |
weet | -0.018 |
orate | -0.014 |
sex | -0.014 |
antly | -0.014 |
iculture | -0.014 |
Token | claw |
Token ID | 43143 |
Feature activation | -2.10e-01 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.017 |
sex | +0.016 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.020 |
\": | -0.020 |
LM | -0.019 |
Token | Shaman |
Token ID | 34805 |
Feature activation | +5.05e-01 |
Pos logit contributions | |
weet | +0.154 |
orate | +0.116 |
sex | +0.114 |
antly | +0.113 |
Rak | +0.112 |
Neg logit contributions | |
oot | -0.152 |
MFT | -0.145 |
タ | -0.137 |
\": | -0.135 |
LM | -0.129 |
Token | vs |
Token ID | 3691 |
Feature activation | -8.08e-02 |
Pos logit contributions | |
oot | +0.392 |
MFT | +0.375 |
タ | +0.354 |
\": | +0.352 |
LM | +0.340 |
Neg logit contributions | |
weet | -0.342 |
orate | -0.251 |
sex | -0.246 |
iculture | -0.243 |
antly | -0.243 |
Token | Pyro |
Token ID | 44954 |
Feature activation | +1.44e-01 |
Pos logit contributions | |
weet | +0.058 |
orate | +0.043 |
sex | +0.043 |
antly | +0.042 |
iculture | +0.042 |
Neg logit contributions | |
oot | -0.060 |
MFT | -0.057 |
タ | -0.054 |
\": | -0.053 |
LM | -0.051 |
Token | blast |
Token ID | 39806 |
Feature activation | -1.55e-01 |
Pos logit contributions | |
oot | +0.114 |
MFT | +0.109 |
タ | +0.103 |
\": | +0.103 |
LM | +0.099 |
Neg logit contributions | |
weet | -0.096 |
orate | -0.070 |
sex | -0.069 |
antly | -0.068 |
iculture | -0.067 |
Token |
|
Token ID | 198 |
Feature activation | +4.54e-02 |
Pos logit contributions | |
weet | +0.111 |
orate | +0.083 |
sex | +0.082 |
antly | +0.081 |
iculture | +0.081 |
Neg logit contributions | |
oot | -0.114 |
MFT | -0.109 |
タ | -0.102 |
\": | -0.101 |
LM | -0.097 |
Token |
|
Token ID | 198 |
Feature activation | +3.24e-02 |
Pos logit contributions | |
oot | +0.034 |
MFT | +0.033 |
タ | +0.031 |
\": | +0.030 |
LM | +0.029 |
Neg logit contributions | |
weet | -0.032 |
orate | -0.024 |
sex | -0.023 |
antly | -0.023 |
iculture | -0.023 |
Token | of |
Token ID | 286 |
Feature activation | +6.92e-03 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.007 |
MFT | -0.006 |
タ | -0.006 |
\": | -0.006 |
LM | -0.006 |
Token | Murray |
Token ID | 12164 |
Feature activation | +2.42e-01 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.006 |
sex | -0.006 |
redistributed | -0.006 |
iculture | -0.006 |
Token | N |
Token ID | 399 |
Feature activation | +4.19e-02 |
Pos logit contributions | |
oot | +0.179 |
MFT | +0.170 |
\": | +0.165 |
タ | +0.163 |
LM | +0.155 |
Neg logit contributions | |
weet | -0.169 |
sex | -0.128 |
Rak | -0.127 |
Rape | -0.124 |
orate | -0.123 |
Token | . |
Token ID | 13 |
Feature activation | +3.75e-03 |
Pos logit contributions | |
oot | +0.030 |
MFT | +0.029 |
タ | +0.027 |
\": | +0.027 |
LM | +0.026 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.023 |
sex | -0.023 |
antly | -0.022 |
iculture | -0.022 |
Token | Roth |
Token ID | 16131 |
Feature activation | +5.67e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
iculture | -0.002 |
antly | -0.002 |
Token | bard |
Token ID | 23024 |
Feature activation | +5.04e-01 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
\": | +0.009 |
タ | +0.008 |
LM | +0.007 |
Neg logit contributions | |
weet | -0.069 |
sex | -0.059 |
orate | -0.058 |
iculture | -0.058 |
antly | -0.058 |
Token | ( |
Token ID | 357 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
oot | +0.384 |
MFT | +0.366 |
\": | +0.353 |
タ | +0.352 |
�� | +0.333 |
Neg logit contributions | |
weet | -0.344 |
sex | -0.251 |
orate | -0.247 |
antly | -0.246 |
Rak | -0.244 |
Token | Prom |
Token ID | 24129 |
Feature activation | +7.84e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
\": | +0.006 |
タ | +0.006 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.008 |
sex | -0.006 |
antly | -0.006 |
orate | -0.006 |
iculture | -0.006 |
Token | etheus |
Token ID | 36916 |
Feature activation | -3.64e-02 |
Pos logit contributions | |
oot | +0.068 |
MFT | +0.066 |
タ | +0.063 |
\": | +0.062 |
LM | +0.060 |
Neg logit contributions | |
weet | -0.046 |
orate | -0.032 |
antly | -0.031 |
sex | -0.031 |
iculture | -0.031 |
Token | Books |
Token ID | 13661 |
Feature activation | +1.48e-01 |
Pos logit contributions | |
weet | +0.027 |
sex | +0.021 |
antly | +0.020 |
orate | +0.020 |
Rak | +0.020 |
Neg logit contributions | |
oot | -0.026 |
MFT | -0.024 |
\": | -0.023 |
タ | -0.023 |
LM | -0.022 |
Token | , |
Token ID | 11 |
Feature activation | +6.49e-02 |
Pos logit contributions | |
oot | +0.114 |
MFT | +0.110 |
タ | +0.104 |
\": | +0.103 |
LM | +0.099 |
Neg logit contributions | |
weet | -0.101 |
orate | -0.074 |
sex | -0.073 |
Rak | -0.072 |
antly | -0.072 |
Token | era |
Token ID | 8607 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +1.94e-02 |
Pos logit contributions | |
oot | +0.157 |
MFT | +0.149 |
タ | +0.141 |
\": | +0.140 |
LM | +0.134 |
Neg logit contributions | |
weet | -0.148 |
orate | -0.110 |
sex | -0.109 |
antly | -0.107 |
iculture | -0.107 |
Token | the |
Token ID | 262 |
Feature activation | +2.80e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.014 |
タ | +0.013 |
\": | +0.013 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.014 |
orate | -0.010 |
sex | -0.010 |
antly | -0.010 |
iculture | -0.010 |
Token | Human |
Token ID | 5524 |
Feature activation | +1.75e-01 |
Pos logit contributions | |
oot | +0.020 |
MFT | +0.019 |
タ | +0.018 |
\": | +0.018 |
LM | +0.017 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.016 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Token | Gen |
Token ID | 5215 |
Feature activation | -8.16e-03 |
Pos logit contributions | |
oot | +0.134 |
MFT | +0.128 |
タ | +0.121 |
\": | +0.120 |
LM | +0.115 |
Neg logit contributions | |
weet | -0.121 |
orate | -0.090 |
sex | -0.088 |
iculture | -0.087 |
antly | -0.087 |
Token | ome |
Token ID | 462 |
Feature activation | +4.88e-01 |
Pos logit contributions | |
weet | +0.010 |
orate | +0.008 |
sex | +0.008 |
iculture | +0.008 |
antly | +0.008 |
Neg logit contributions | |
oot | -0.002 |
MFT | -0.002 |
タ | -0.002 |
\": | -0.002 |
LM | -0.002 |
Token | Project |
Token ID | 4935 |
Feature activation | -2.13e-02 |
Pos logit contributions | |
oot | +0.365 |
MFT | +0.346 |
タ | +0.330 |
\": | +0.326 |
heim | +0.314 |
Neg logit contributions | |
weet | -0.344 |
orate | -0.257 |
sex | -0.254 |
antly | -0.250 |
redistributed | -0.248 |
Token | both |
Token ID | 1111 |
Feature activation | -1.85e-01 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.015 |
MFT | -0.014 |
タ | -0.014 |
\": | -0.013 |
LM | -0.013 |
Token | complete |
Token ID | 1844 |
Feature activation | -2.22e-02 |
Pos logit contributions | |
weet | +0.139 |
orate | +0.106 |
sex | +0.104 |
antly | +0.103 |
redistributed | +0.103 |
Neg logit contributions | |
oot | -0.130 |
MFT | -0.123 |
タ | -0.117 |
\": | -0.115 |
LM | -0.111 |
Token | 99 |
Token ID | 7388 |
Feature activation | -4.68e-02 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.011 |
Neg logit contributions | |
oot | -0.016 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token | % |
Token ID | 4 |
Feature activation | +5.47e-02 |
Pos logit contributions | |
weet | +0.037 |
orate | +0.028 |
sex | +0.028 |
antly | +0.028 |
iculture | +0.027 |
Neg logit contributions | |
oot | -0.031 |
MFT | -0.030 |
タ | -0.028 |
\": | -0.028 |
LM | -0.026 |
Token | N |
Token ID | 399 |
Feature activation | +4.39e-02 |
Pos logit contributions | |
weet | +0.027 |
orate | +0.020 |
sex | +0.020 |
antly | +0.020 |
redistributed | +0.020 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.021 |
\": | -0.020 |
heim | -0.020 |
Token | iss |
Token ID | 747 |
Feature activation | +1.15e-01 |
Pos logit contributions | |
oot | +0.029 |
MFT | +0.028 |
タ | +0.027 |
\": | +0.026 |
LM | +0.025 |
Neg logit contributions | |
weet | -0.034 |
orate | -0.026 |
sex | -0.026 |
antly | -0.026 |
iculture | -0.025 |
Token | ans |
Token ID | 504 |
Feature activation | +4.03e-02 |
Pos logit contributions | |
oot | +0.074 |
MFT | +0.070 |
タ | +0.065 |
\": | +0.064 |
LM | +0.062 |
Neg logit contributions | |
weet | -0.094 |
orate | -0.073 |
sex | -0.072 |
antly | -0.071 |
iculture | -0.071 |
Token | of |
Token ID | 286 |
Feature activation | +2.00e-02 |
Pos logit contributions | |
oot | +0.032 |
MFT | +0.030 |
タ | +0.029 |
\": | +0.029 |
LM | +0.028 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.020 |
sex | -0.019 |
antly | -0.019 |
iculture | -0.019 |
Token | R |
Token ID | 371 |
Feature activation | +7.71e-02 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.013 |
\": | +0.013 |
heim | +0.012 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.011 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | LR |
Token ID | 35972 |
Feature activation | +4.83e-01 |
Pos logit contributions | |
oot | +0.050 |
MFT | +0.048 |
タ | +0.045 |
\": | +0.044 |
�� | +0.042 |
Neg logit contributions | |
weet | -0.062 |
orate | -0.048 |
sex | -0.047 |
antly | -0.047 |
iculture | -0.047 |
Token | MS |
Token ID | 6579 |
Feature activation | +1.59e-01 |
Pos logit contributions | |
oot | +0.299 |
MFT | +0.284 |
タ | +0.264 |
\": | +0.262 |
LM | +0.248 |
Neg logit contributions | |
weet | -0.402 |
orate | -0.314 |
sex | -0.311 |
antly | -0.307 |
iculture | -0.307 |
Token | port |
Token ID | 634 |
Feature activation | +9.88e-02 |
Pos logit contributions | |
oot | +0.110 |
MFT | +0.104 |
タ | +0.098 |
\": | +0.097 |
heim | +0.093 |
Neg logit contributions | |
weet | -0.121 |
orate | -0.092 |
sex | -0.091 |
antly | -0.090 |
iculture | -0.090 |
Token | and |
Token ID | 290 |
Feature activation | +3.43e-02 |
Pos logit contributions | |
oot | +0.073 |
MFT | +0.070 |
タ | +0.066 |
\": | +0.065 |
LM | +0.062 |
Neg logit contributions | |
weet | -0.071 |
orate | -0.053 |
sex | -0.052 |
antly | -0.051 |
iculture | -0.051 |
Token | 360 |
Token ID | 11470 |
Feature activation | -8.94e-02 |
Pos logit contributions | |
oot | +0.024 |
MFT | +0.023 |
タ | +0.022 |
\": | +0.022 |
heim | +0.021 |
Neg logit contributions | |
weet | -0.026 |
orate | -0.019 |
sex | -0.019 |
antly | -0.019 |
iculture | -0.019 |
Token | Racing |
Token ID | 14954 |
Feature activation | +2.32e-01 |
Pos logit contributions | |
weet | +0.069 |
orate | +0.052 |
sex | +0.052 |
antly | +0.051 |
iculture | +0.051 |
Neg logit contributions | |
oot | -0.061 |
MFT | -0.058 |
タ | -0.055 |
\": | -0.054 |
heim | -0.052 |
Token | then |
Token ID | 788 |
Feature activation | -1.81e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
antly | -0.013 |
iculture | -0.013 |
Token | teenager |
Token ID | 15287 |
Feature activation | +3.02e-02 |
Pos logit contributions | |
weet | +0.013 |
orate | +0.010 |
sex | +0.010 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.013 |
MFT | -0.012 |
タ | -0.012 |
\": | -0.012 |
LM | -0.011 |
Token | to |
Token ID | 284 |
Feature activation | +2.82e-02 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.020 |
\": | +0.020 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | the |
Token ID | 262 |
Feature activation | +1.67e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.018 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.020 |
orate | -0.015 |
sex | -0.015 |
iculture | -0.015 |
antly | -0.015 |
Token | 2009 |
Token ID | 3717 |
Feature activation | +8.29e-03 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.011 |
\": | +0.010 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | Confeder |
Token ID | 16986 |
Feature activation | +4.79e-01 |
Pos logit contributions | |
oot | +0.006 |
MFT | +0.005 |
タ | +0.005 |
\": | +0.005 |
LM | +0.005 |
Neg logit contributions | |
weet | -0.006 |
orate | -0.005 |
sex | -0.005 |
antly | -0.005 |
iculture | -0.005 |
Token | ations |
Token ID | 602 |
Feature activation | +1.36e-01 |
Pos logit contributions | |
oot | +0.310 |
MFT | +0.293 |
タ | +0.274 |
\": | +0.270 |
LM | +0.259 |
Neg logit contributions | |
weet | -0.386 |
orate | -0.300 |
sex | -0.295 |
redistributed | -0.293 |
iculture | -0.292 |
Token | Cup |
Token ID | 5454 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
oot | +0.085 |
MFT | +0.081 |
タ | +0.075 |
\": | +0.074 |
LM | +0.070 |
Neg logit contributions | |
weet | -0.113 |
orate | -0.087 |
sex | -0.087 |
antly | -0.086 |
iculture | -0.085 |
Token | and |
Token ID | 290 |
Feature activation | +9.78e-03 |
Pos logit contributions | |
oot | +0.079 |
MFT | +0.075 |
タ | +0.071 |
\": | +0.070 |
LM | +0.067 |
Neg logit contributions | |
weet | -0.081 |
orate | -0.061 |
sex | -0.060 |
antly | -0.060 |
iculture | -0.059 |
Token | that |
Token ID | 326 |
Feature activation | -2.01e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.006 |
\": | +0.006 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.005 |
sex | -0.005 |
iculture | -0.005 |
antly | -0.005 |
Token | experience |
Token ID | 1998 |
Feature activation | -2.02e-02 |
Pos logit contributions | |
weet | +0.014 |
orate | +0.011 |
sex | +0.010 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.015 |
MFT | -0.014 |
タ | -0.013 |
\": | -0.013 |
LM | -0.013 |
Token | p |
Token ID | 279 |
Feature activation | +7.74e-02 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
LM | +0.007 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.006 |
sex | -0.006 |
antly | -0.006 |
iculture | -0.006 |
Token | ère |
Token ID | 35979 |
Feature activation | -1.25e-01 |
Pos logit contributions | |
oot | +0.059 |
MFT | +0.057 |
タ | +0.054 |
\": | +0.053 |
LM | +0.051 |
Neg logit contributions | |
weet | -0.053 |
orate | -0.040 |
sex | -0.039 |
antly | -0.038 |
iculture | -0.038 |
Token | , |
Token ID | 11 |
Feature activation | +5.44e-02 |
Pos logit contributions | |
weet | +0.093 |
orate | +0.071 |
sex | +0.070 |
antly | +0.069 |
iculture | +0.069 |
Neg logit contributions | |
oot | -0.088 |
MFT | -0.084 |
タ | -0.079 |
\": | -0.078 |
LM | -0.075 |
Token | cr |
Token ID | 1067 |
Feature activation | -9.84e-02 |
Pos logit contributions | |
oot | +0.038 |
MFT | +0.036 |
タ | +0.034 |
\": | +0.034 |
LM | +0.033 |
Neg logit contributions | |
weet | -0.041 |
orate | -0.031 |
sex | -0.031 |
antly | -0.030 |
iculture | -0.030 |
Token | é |
Token ID | 2634 |
Feature activation | +8.28e-03 |
Pos logit contributions | |
weet | +0.070 |
orate | +0.053 |
sex | +0.052 |
antly | +0.051 |
iculture | +0.051 |
Neg logit contributions | |
oot | -0.073 |
MFT | -0.070 |
タ | -0.066 |
\": | -0.065 |
LM | -0.063 |
Token | ateur |
Token ID | 15093 |
Feature activation | +4.68e-01 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.004 |
\": | +0.004 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.006 |
sex | -0.006 |
iculture | -0.005 |
antly | -0.005 |
Token | origin |
Token ID | 8159 |
Feature activation | +6.17e-02 |
Pos logit contributions | |
oot | +0.330 |
MFT | +0.314 |
タ | +0.296 |
\": | +0.293 |
LM | +0.281 |
Neg logit contributions | |
weet | -0.350 |
orate | -0.266 |
sex | -0.262 |
antly | -0.259 |
iculture | -0.258 |
Token | el |
Token ID | 417 |
Feature activation | +6.51e-02 |
Pos logit contributions | |
oot | +0.043 |
MFT | +0.041 |
タ | +0.039 |
\": | +0.038 |
LM | +0.037 |
Neg logit contributions | |
weet | -0.047 |
orate | -0.036 |
sex | -0.035 |
antly | -0.035 |
iculture | -0.035 |
Token | du |
Token ID | 7043 |
Feature activation | -3.54e-02 |
Pos logit contributions | |
oot | +0.045 |
MFT | +0.042 |
タ | +0.040 |
\": | +0.040 |
LM | +0.038 |
Neg logit contributions | |
weet | -0.050 |
orate | -0.038 |
sex | -0.038 |
antly | -0.037 |
iculture | -0.037 |
Token | programme |
Token ID | 11383 |
Feature activation | -1.15e-01 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.020 |
sex | +0.020 |
iculture | +0.019 |
antly | +0.019 |
Neg logit contributions | |
oot | -0.025 |
MFT | -0.024 |
タ | -0.023 |
\": | -0.022 |
LM | -0.021 |
Token | C |
Token ID | 327 |
Feature activation | +1.98e-02 |
Pos logit contributions | |
weet | +0.086 |
orate | +0.066 |
sex | +0.065 |
antly | +0.064 |
iculture | +0.064 |
Neg logit contributions | |
oot | -0.081 |
MFT | -0.077 |
タ | -0.073 |
\": | -0.072 |
LM | -0.069 |
Token | looks |
Token ID | 3073 |
Feature activation | +9.44e-04 |
Pos logit contributions | |
weet | +0.014 |
orate | +0.011 |
sex | +0.011 |
antly | +0.011 |
redistributed | +0.010 |
Neg logit contributions | |
oot | -0.013 |
MFT | -0.012 |
タ | -0.011 |
\": | -0.011 |
LM | -0.011 |
Token | well |
Token ID | 880 |
Feature activation | +1.62e-01 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.000 |
sex | -0.000 |
antly | -0.000 |
iculture | -0.000 |
Token | n |
Token ID | 299 |
Feature activation | -5.59e-02 |
Pos logit contributions | |
oot | +0.120 |
MFT | +0.114 |
タ | +0.108 |
\": | +0.107 |
LM | +0.103 |
Neg logit contributions | |
weet | -0.116 |
orate | -0.087 |
sex | -0.085 |
iculture | -0.084 |
antly | -0.084 |
Token | igh |
Token ID | 394 |
Feature activation | +4.12e-03 |
Pos logit contributions | |
weet | +0.039 |
orate | +0.028 |
sex | +0.028 |
antly | +0.028 |
iculture | +0.028 |
Neg logit contributions | |
oot | -0.043 |
MFT | -0.041 |
タ | -0.039 |
\": | -0.038 |
LM | -0.037 |
Token | insol |
Token ID | 35831 |
Feature activation | -9.61e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
LM | +0.003 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | uble |
Token ID | 26664 |
Feature activation | +4.57e-01 |
Pos logit contributions | |
weet | +0.111 |
sex | +0.093 |
orate | +0.093 |
antly | +0.093 |
iculture | +0.090 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
\": | -0.022 |
タ | -0.021 |
LM | -0.020 |
Token | , |
Token ID | 11 |
Feature activation | -1.69e-02 |
Pos logit contributions | |
oot | +0.332 |
MFT | +0.317 |
タ | +0.299 |
\": | +0.296 |
LM | +0.285 |
Neg logit contributions | |
weet | -0.331 |
orate | -0.249 |
sex | -0.245 |
antly | -0.243 |
iculture | -0.241 |
Token | given |
Token ID | 1813 |
Feature activation | +1.47e-02 |
Pos logit contributions | |
weet | +0.012 |
orate | +0.009 |
sex | +0.009 |
antly | +0.009 |
iculture | +0.009 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.012 |
タ | -0.011 |
\": | -0.011 |
LM | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | -4.30e-02 |
Pos logit contributions | |
oot | +0.010 |
MFT | +0.010 |
タ | +0.009 |
\": | +0.009 |
LM | +0.009 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.008 |
sex | -0.008 |
antly | -0.008 |
iculture | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | -3.92e-02 |
Pos logit contributions | |
weet | +0.030 |
orate | +0.022 |
sex | +0.022 |
antly | +0.022 |
iculture | +0.021 |
Neg logit contributions | |
oot | -0.033 |
MFT | -0.031 |
タ | -0.029 |
\": | -0.029 |
LM | -0.028 |
Token | that |
Token ID | 5562 |
Feature activation | -4.25e-02 |
Pos logit contributions | |
weet | +0.033 |
orate | +0.026 |
sex | +0.026 |
antly | +0.025 |
iculture | +0.025 |
Neg logit contributions | |
oot | -0.024 |
MFT | -0.023 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | energy |
Token ID | 2568 |
Feature activation | -5.13e-02 |
Pos logit contributions | |
weet | +0.063 |
orate | +0.046 |
sex | +0.045 |
iculture | +0.044 |
antly | +0.044 |
Neg logit contributions | |
oot | -0.078 |
MFT | -0.075 |
タ | -0.071 |
\": | -0.070 |
LM | -0.068 |
Token | prices |
Token ID | 4536 |
Feature activation | -1.06e-01 |
Pos logit contributions | |
weet | +0.033 |
orate | +0.024 |
sex | +0.023 |
iculture | +0.023 |
antly | +0.023 |
Neg logit contributions | |
oot | -0.041 |
MFT | -0.040 |
タ | -0.038 |
\": | -0.037 |
LM | -0.036 |
Token | were |
Token ID | 547 |
Feature activation | -3.77e-02 |
Pos logit contributions | |
weet | +0.073 |
orate | +0.054 |
sex | +0.053 |
antly | +0.052 |
redistributed | +0.052 |
Neg logit contributions | |
oot | -0.081 |
MFT | -0.077 |
タ | -0.073 |
\": | -0.073 |
LM | -0.070 |
Token | an |
Token ID | 281 |
Feature activation | +3.16e-03 |
Pos logit contributions | |
weet | +0.027 |
orate | +0.021 |
sex | +0.020 |
antly | +0.020 |
redistributed | +0.020 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
タ | -0.025 |
\": | -0.024 |
heim | -0.023 |
Token | insol |
Token ID | 35831 |
Feature activation | -7.20e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
heim | +0.002 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.002 |
redistributed | -0.002 |
antly | -0.002 |
sex | -0.002 |
Token | uble |
Token ID | 26664 |
Feature activation | +4.56e-01 |
Pos logit contributions | |
weet | +0.086 |
orate | +0.073 |
sex | +0.073 |
antly | +0.073 |
iculture | +0.072 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.016 |
\": | -0.013 |
タ | -0.013 |
LM | -0.011 |
Token | problem |
Token ID | 1917 |
Feature activation | +8.35e-02 |
Pos logit contributions | |
oot | +0.303 |
MFT | +0.287 |
タ | +0.269 |
\": | +0.267 |
LM | +0.255 |
Neg logit contributions | |
weet | -0.360 |
orate | -0.278 |
sex | -0.274 |
antly | -0.270 |
iculture | -0.270 |
Token | ; |
Token ID | 26 |
Feature activation | -1.69e-02 |
Pos logit contributions | |
oot | +0.061 |
MFT | +0.058 |
タ | +0.055 |
\": | +0.054 |
LM | +0.052 |
Neg logit contributions | |
weet | -0.061 |
orate | -0.046 |
sex | -0.045 |
antly | -0.044 |
redistributed | -0.044 |
Token | to |
Token ID | 284 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
weet | +0.012 |
orate | +0.009 |
sex | +0.009 |
iculture | +0.009 |
antly | +0.009 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.012 |
タ | -0.011 |
\": | -0.011 |
LM | -0.011 |
Token | Obama |
Token ID | 2486 |
Feature activation | +1.52e-01 |
Pos logit contributions | |
oot | +0.011 |
MFT | +0.011 |
タ | +0.010 |
\": | +0.010 |
LM | +0.009 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | , |
Token ID | 11 |
Feature activation | -5.55e-03 |
Pos logit contributions | |
oot | +0.111 |
MFT | +0.105 |
タ | +0.099 |
\": | +0.098 |
LM | +0.095 |
Neg logit contributions | |
weet | -0.111 |
orate | -0.083 |
sex | -0.082 |
iculture | -0.081 |
antly | -0.081 |
Token | , |
Token ID | 11 |
Feature activation | +1.82e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.017 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | or |
Token ID | 393 |
Feature activation | +2.93e-02 |
Pos logit contributions | |
oot | +0.013 |
MFT | +0.012 |
タ | +0.012 |
\": | +0.012 |
LM | +0.011 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.010 |
sex | -0.010 |
antly | -0.010 |
iculture | -0.010 |
Token | the |
Token ID | 262 |
Feature activation | +1.98e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | watch |
Token ID | 2342 |
Feature activation | +7.21e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.012 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | reb |
Token ID | 3405 |
Feature activation | -6.36e-02 |
Pos logit contributions | |
oot | +0.054 |
MFT | +0.052 |
タ | +0.049 |
\": | +0.049 |
LM | +0.047 |
Neg logit contributions | |
weet | -0.050 |
orate | -0.037 |
sex | -0.037 |
antly | -0.036 |
iculture | -0.036 |
Token | oots |
Token ID | 13880 |
Feature activation | +4.56e-01 |
Pos logit contributions | |
weet | +0.068 |
orate | +0.056 |
sex | +0.056 |
antly | +0.056 |
iculture | +0.055 |
Neg logit contributions | |
oot | -0.024 |
MFT | -0.023 |
タ | -0.020 |
\": | -0.020 |
LM | -0.018 |
Token | ( |
Token ID | 357 |
Feature activation | -8.14e-03 |
Pos logit contributions | |
oot | +0.329 |
MFT | +0.313 |
タ | +0.294 |
\": | +0.292 |
LM | +0.282 |
Neg logit contributions | |
weet | -0.334 |
orate | -0.252 |
sex | -0.248 |
iculture | -0.245 |
antly | -0.244 |
Token | all |
Token ID | 439 |
Feature activation | +6.91e-02 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.005 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | of |
Token ID | 286 |
Feature activation | +1.04e-02 |
Pos logit contributions | |
oot | +0.049 |
MFT | +0.046 |
タ | +0.043 |
\": | +0.043 |
LM | +0.041 |
Neg logit contributions | |
weet | -0.052 |
orate | -0.040 |
sex | -0.039 |
antly | -0.038 |
iculture | -0.038 |
Token | them |
Token ID | 606 |
Feature activation | +2.33e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.006 |
sex | -0.006 |
antly | -0.006 |
iculture | -0.006 |
Token | happened |
Token ID | 3022 |
Feature activation | +3.51e-02 |
Pos logit contributions | |
oot | +0.016 |
MFT | +0.015 |
タ | +0.014 |
\": | +0.014 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.018 |
orate | -0.014 |
sex | -0.014 |
antly | -0.013 |
iculture | -0.013 |
Token | ling |
Token ID | 1359 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
oot | +0.074 |
MFT | +0.070 |
タ | +0.067 |
\": | +0.066 |
LM | +0.064 |
Neg logit contributions | |
weet | -0.066 |
orate | -0.049 |
sex | -0.048 |
redistributed | -0.047 |
iculture | -0.047 |
Token | markets |
Token ID | 5939 |
Feature activation | -5.26e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.005 |
sex | -0.005 |
iculture | -0.005 |
antly | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | -1.53e-03 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.028 |
sex | +0.028 |
antly | +0.028 |
iculture | +0.028 |
Neg logit contributions | |
oot | -0.038 |
MFT | -0.037 |
タ | -0.035 |
\": | -0.034 |
LM | -0.033 |
Token | drawing |
Token ID | 8263 |
Feature activation | -4.87e-02 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token | reb |
Token ID | 3405 |
Feature activation | -1.24e-01 |
Pos logit contributions | |
weet | +0.035 |
orate | +0.027 |
sex | +0.026 |
iculture | +0.026 |
antly | +0.026 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.034 |
タ | -0.032 |
\": | -0.032 |
LM | -0.030 |
Token | ukes |
Token ID | 31469 |
Feature activation | +4.47e-01 |
Pos logit contributions | |
weet | +0.138 |
orate | +0.116 |
antly | +0.114 |
sex | +0.114 |
iculture | +0.113 |
Neg logit contributions | |
MFT | -0.040 |
oot | -0.039 |
タ | -0.034 |
\": | -0.032 |
LM | -0.032 |
Token | from |
Token ID | 422 |
Feature activation | +6.98e-02 |
Pos logit contributions | |
oot | +0.309 |
MFT | +0.294 |
タ | +0.275 |
\": | +0.273 |
LM | +0.262 |
Neg logit contributions | |
weet | -0.342 |
orate | -0.261 |
sex | -0.257 |
antly | -0.254 |
iculture | -0.253 |
Token | some |
Token ID | 617 |
Feature activation | -8.62e-02 |
Pos logit contributions | |
oot | +0.052 |
MFT | +0.049 |
タ | +0.046 |
\": | +0.046 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.050 |
orate | -0.037 |
sex | -0.037 |
antly | -0.036 |
iculture | -0.036 |
Token | Republicans |
Token ID | 4734 |
Feature activation | -6.60e-03 |
Pos logit contributions | |
weet | +0.064 |
orate | +0.048 |
sex | +0.048 |
antly | +0.047 |
iculture | +0.047 |
Neg logit contributions | |
oot | -0.061 |
MFT | -0.058 |
タ | -0.055 |
\": | -0.054 |
LM | -0.052 |
Token | . |
Token ID | 13 |
Feature activation | +2.28e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.005 |
タ | -0.004 |
\": | -0.004 |
LM | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +1.78e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.012 |
iculture | -0.012 |
antly | -0.012 |
Token | so |
Token ID | 523 |
Feature activation | -9.91e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | when |
Token ID | 618 |
Feature activation | -6.33e-02 |
Pos logit contributions | |
weet | +0.074 |
orate | +0.056 |
sex | +0.055 |
antly | +0.055 |
iculture | +0.054 |
Neg logit contributions | |
oot | -0.070 |
MFT | -0.067 |
タ | -0.063 |
\": | -0.062 |
LM | -0.060 |
Token | the |
Token ID | 262 |
Feature activation | +2.23e-02 |
Pos logit contributions | |
weet | +0.048 |
orate | +0.036 |
sex | +0.036 |
antly | +0.035 |
iculture | +0.035 |
Neg logit contributions | |
oot | -0.044 |
MFT | -0.042 |
タ | -0.040 |
\": | -0.039 |
LM | -0.038 |
Token | watch |
Token ID | 2342 |
Feature activation | +8.98e-02 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.015 |
タ | +0.014 |
\": | +0.014 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
antly | -0.013 |
iculture | -0.013 |
Token | reb |
Token ID | 3405 |
Feature activation | -7.62e-02 |
Pos logit contributions | |
oot | +0.063 |
MFT | +0.060 |
タ | +0.056 |
\": | +0.056 |
LM | +0.053 |
Neg logit contributions | |
weet | -0.067 |
orate | -0.051 |
sex | -0.051 |
antly | -0.050 |
iculture | -0.050 |
Token | oots |
Token ID | 13880 |
Feature activation | +4.45e-01 |
Pos logit contributions | |
weet | +0.081 |
orate | +0.067 |
sex | +0.067 |
antly | +0.066 |
iculture | +0.066 |
Neg logit contributions | |
oot | -0.028 |
MFT | -0.028 |
タ | -0.025 |
\": | -0.024 |
LM | -0.022 |
Token | , |
Token ID | 11 |
Feature activation | +6.26e-03 |
Pos logit contributions | |
oot | +0.321 |
MFT | +0.305 |
タ | +0.287 |
\": | +0.284 |
LM | +0.275 |
Neg logit contributions | |
weet | -0.326 |
orate | -0.246 |
sex | -0.242 |
iculture | -0.240 |
antly | -0.238 |
Token | the |
Token ID | 262 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
oot | +0.004 |
MFT | +0.004 |
タ | +0.004 |
\": | +0.004 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.003 |
antly | -0.003 |
iculture | -0.003 |
Token | key |
Token ID | 1994 |
Feature activation | +1.68e-01 |
Pos logit contributions | |
oot | +0.011 |
MFT | +0.010 |
タ | +0.010 |
\": | +0.010 |
LM | +0.009 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | gets |
Token ID | 3011 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
oot | +0.110 |
MFT | +0.105 |
タ | +0.098 |
\": | +0.097 |
heim | +0.093 |
Neg logit contributions | |
weet | -0.134 |
orate | -0.104 |
sex | -0.103 |
antly | -0.102 |
redistributed | -0.100 |
Token | released |
Token ID | 2716 |
Feature activation | -2.05e-02 |
Pos logit contributions | |
oot | +0.084 |
MFT | +0.080 |
タ | +0.076 |
\": | +0.076 |
heim | +0.073 |
Neg logit contributions | |
weet | -0.065 |
orate | -0.046 |
sex | -0.046 |
redistributed | -0.045 |
antly | -0.045 |
Token | . |
Token ID | 13 |
Feature activation | +1.45e-02 |
Pos logit contributions | |
weet | +0.206 |
orate | +0.154 |
sex | +0.153 |
iculture | +0.150 |
antly | +0.150 |
Neg logit contributions | |
oot | -0.205 |
MFT | -0.196 |
タ | -0.185 |
\": | -0.184 |
LM | -0.176 |
Token | " |
Token ID | 366 |
Feature activation | -1.59e-02 |
Pos logit contributions | |
oot | +0.010 |
MFT | +0.010 |
タ | +0.009 |
\": | +0.009 |
LM | +0.009 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.008 |
sex | -0.008 |
antly | -0.008 |
iculture | -0.008 |
Token | It |
Token ID | 1026 |
Feature activation | +3.92e-02 |
Pos logit contributions | |
weet | +0.012 |
orate | +0.009 |
sex | +0.009 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | 's |
Token ID | 338 |
Feature activation | +6.64e-02 |
Pos logit contributions | |
oot | +0.027 |
MFT | +0.026 |
タ | +0.024 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.030 |
orate | -0.023 |
sex | -0.023 |
antly | -0.022 |
iculture | -0.022 |
Token | horrific |
Token ID | 19447 |
Feature activation | -1.14e-01 |
Pos logit contributions | |
oot | +0.050 |
MFT | +0.047 |
タ | +0.045 |
\": | +0.044 |
LM | +0.043 |
Neg logit contributions | |
weet | -0.047 |
orate | -0.035 |
sex | -0.034 |
antly | -0.034 |
iculture | -0.034 |
Token | ally |
Token ID | 453 |
Feature activation | +4.39e-01 |
Pos logit contributions | |
weet | +0.085 |
orate | +0.065 |
sex | +0.064 |
antly | +0.063 |
iculture | +0.063 |
Neg logit contributions | |
oot | -0.080 |
MFT | -0.076 |
タ | -0.072 |
\": | -0.071 |
LM | -0.068 |
Token | challenging |
Token ID | 9389 |
Feature activation | -7.03e-02 |
Pos logit contributions | |
oot | +0.318 |
MFT | +0.303 |
タ | +0.284 |
\": | +0.282 |
LM | +0.272 |
Neg logit contributions | |
weet | -0.320 |
orate | -0.242 |
sex | -0.237 |
iculture | -0.236 |
antly | -0.235 |
Token | to |
Token ID | 284 |
Feature activation | -6.68e-03 |
Pos logit contributions | |
weet | +0.051 |
orate | +0.038 |
sex | +0.038 |
antly | +0.037 |
iculture | +0.037 |
Neg logit contributions | |
oot | -0.051 |
MFT | -0.049 |
タ | -0.046 |
\": | -0.046 |
LM | -0.044 |
Token | move |
Token ID | 1445 |
Feature activation | -1.36e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.004 |
タ | -0.004 |
\": | -0.004 |
LM | -0.004 |
Token | these |
Token ID | 777 |
Feature activation | +1.65e-02 |
Pos logit contributions | |
weet | +0.010 |
orate | +0.008 |
sex | +0.007 |
iculture | +0.007 |
antly | +0.007 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.009 |
タ | -0.009 |
\": | -0.009 |
LM | -0.008 |
Token | start |
Token ID | 923 |
Feature activation | -4.83e-03 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.011 |
\": | +0.010 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.009 |
sex | -0.009 |
iculture | -0.009 |
antly | -0.009 |
Token | , |
Token ID | 11 |
Feature activation | +4.60e-04 |
Pos logit contributions | |
oot | +0.027 |
MFT | +0.026 |
タ | +0.025 |
\": | +0.024 |
heim | +0.023 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.020 |
sex | -0.020 |
antly | -0.020 |
iculture | -0.019 |
Token | � |
Token ID | 447 |
Feature activation | +2.18e-02 |
Pos logit contributions | |
oot | +0.000 |
MFT | +0.000 |
タ | +0.000 |
\": | +0.000 |
LM | +0.000 |
Neg logit contributions | |
weet | -0.000 |
orate | -0.000 |
sex | -0.000 |
antly | -0.000 |
iculture | -0.000 |
Token | � |
Token ID | 251 |
Feature activation | +2.98e-02 |
Pos logit contributions | |
oot | +0.010 |
MFT | +0.010 |
タ | +0.009 |
\": | +0.009 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.017 |
sex | -0.017 |
iculture | -0.017 |
antly | -0.017 |
Token | said |
Token ID | 531 |
Feature activation | +7.47e-02 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.020 |
\": | +0.019 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | Alexand |
Token ID | 21000 |
Feature activation | +1.19e-01 |
Pos logit contributions | |
oot | +0.057 |
MFT | +0.054 |
タ | +0.051 |
\": | +0.051 |
LM | +0.049 |
Neg logit contributions | |
weet | -0.052 |
orate | -0.039 |
sex | -0.038 |
antly | -0.037 |
iculture | -0.037 |
Token | rine |
Token ID | 7640 |
Feature activation | +4.36e-01 |
Pos logit contributions | |
oot | +0.077 |
MFT | +0.073 |
タ | +0.068 |
\": | +0.067 |
LM | +0.064 |
Neg logit contributions | |
weet | -0.096 |
orate | -0.075 |
sex | -0.074 |
iculture | -0.073 |
antly | -0.073 |
Token | Lat |
Token ID | 5476 |
Feature activation | +6.90e-02 |
Pos logit contributions | |
oot | +0.326 |
MFT | +0.311 |
タ | +0.293 |
\": | +0.291 |
LM | +0.280 |
Neg logit contributions | |
weet | -0.308 |
orate | -0.230 |
sex | -0.226 |
antly | -0.223 |
iculture | -0.222 |
Token | end |
Token ID | 437 |
Feature activation | -8.12e-02 |
Pos logit contributions | |
oot | +0.045 |
MFT | +0.044 |
タ | +0.041 |
\": | +0.041 |
LM | +0.039 |
Neg logit contributions | |
weet | -0.055 |
orate | -0.042 |
sex | -0.042 |
antly | -0.041 |
iculture | -0.041 |
Token | res |
Token ID | 411 |
Feature activation | +4.63e-02 |
Pos logit contributions | |
weet | +0.061 |
orate | +0.047 |
sex | +0.046 |
antly | +0.045 |
iculture | +0.045 |
Neg logit contributions | |
oot | -0.057 |
MFT | -0.054 |
タ | -0.051 |
\": | -0.051 |
LM | -0.048 |
Token | se |
Token ID | 325 |
Feature activation | +6.17e-02 |
Pos logit contributions | |
oot | +0.032 |
MFT | +0.030 |
タ | +0.028 |
\": | +0.028 |
LM | +0.027 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.027 |
sex | -0.027 |
antly | -0.026 |
iculture | -0.026 |
Token | , |
Token ID | 11 |
Feature activation | +1.99e-02 |
Pos logit contributions | |
oot | +0.046 |
MFT | +0.044 |
タ | +0.041 |
\": | +0.041 |
LM | +0.039 |
Neg logit contributions | |
weet | -0.044 |
orate | -0.032 |
sex | -0.032 |
antly | -0.032 |
iculture | -0.031 |
Token | Japan |
Token ID | 2869 |
Feature activation | -7.73e-03 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.020 |
sex | +0.019 |
antly | +0.019 |
iculture | +0.019 |
Neg logit contributions | |
oot | -0.026 |
MFT | -0.025 |
タ | -0.023 |
\": | -0.023 |
LM | -0.022 |
Token | near |
Token ID | 1474 |
Feature activation | +1.22e-01 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
iculture | +0.004 |
antly | +0.004 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | the |
Token ID | 262 |
Feature activation | +9.13e-04 |
Pos logit contributions | |
oot | +0.091 |
MFT | +0.086 |
タ | +0.082 |
\": | +0.081 |
LM | +0.078 |
Neg logit contributions | |
weet | -0.086 |
orate | -0.064 |
sex | -0.063 |
antly | -0.062 |
iculture | -0.062 |
Token | plant |
Token ID | 4618 |
Feature activation | -9.36e-03 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.001 |
sex | -0.001 |
antly | -0.000 |
iculture | -0.000 |
Token | unin |
Token ID | 26329 |
Feature activation | -2.04e-01 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.005 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.007 |
MFT | -0.007 |
タ | -0.007 |
\": | -0.007 |
LM | -0.006 |
Token | hab |
Token ID | 5976 |
Feature activation | +4.26e-01 |
Pos logit contributions | |
weet | +0.251 |
orate | +0.215 |
sex | +0.213 |
antly | +0.211 |
iculture | +0.211 |
Neg logit contributions | |
oot | -0.045 |
MFT | -0.038 |
タ | -0.030 |
\": | -0.029 |
LM | -0.024 |
Token | itable |
Token ID | 4674 |
Feature activation | +9.60e-02 |
Pos logit contributions | |
oot | +0.252 |
MFT | +0.242 |
タ | +0.224 |
\": | +0.221 |
LM | +0.212 |
Neg logit contributions | |
weet | -0.367 |
orate | -0.289 |
sex | -0.284 |
antly | -0.282 |
iculture | -0.281 |
Token | , |
Token ID | 11 |
Feature activation | -7.80e-03 |
Pos logit contributions | |
oot | +0.070 |
MFT | +0.066 |
タ | +0.063 |
\": | +0.062 |
LM | +0.060 |
Neg logit contributions | |
weet | -0.070 |
orate | -0.052 |
sex | -0.052 |
antly | -0.051 |
iculture | -0.051 |
Token | at |
Token ID | 379 |
Feature activation | -4.29e-02 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.005 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | least |
Token ID | 1551 |
Feature activation | -3.97e-02 |
Pos logit contributions | |
weet | +0.031 |
orate | +0.023 |
sex | +0.023 |
antly | +0.022 |
iculture | +0.022 |
Neg logit contributions | |
oot | -0.032 |
MFT | -0.030 |
タ | -0.029 |
\": | -0.028 |
LM | -0.027 |
Token | for |
Token ID | 329 |
Feature activation | -2.33e-02 |
Pos logit contributions | |
weet | +0.030 |
orate | +0.023 |
sex | +0.022 |
antly | +0.022 |
iculture | +0.022 |
Neg logit contributions | |
oot | -0.028 |
MFT | -0.027 |
タ | -0.025 |
\": | -0.025 |
LM | -0.024 |
Token | Centre |
Token ID | 9072 |
Feature activation | +5.73e-02 |
Pos logit contributions | |
oot | +0.085 |
MFT | +0.081 |
タ | +0.076 |
\": | +0.076 |
LM | +0.073 |
Neg logit contributions | |
weet | -0.088 |
orate | -0.067 |
sex | -0.066 |
antly | -0.065 |
iculture | -0.065 |
Token | for |
Token ID | 329 |
Feature activation | +4.66e-03 |
Pos logit contributions | |
oot | +0.041 |
MFT | +0.039 |
タ | +0.037 |
\": | +0.036 |
LM | +0.035 |
Neg logit contributions | |
weet | -0.042 |
orate | -0.032 |
sex | -0.032 |
antly | -0.031 |
iculture | -0.031 |
Token | Psychological |
Token ID | 30583 |
Feature activation | -9.57e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
heim | +0.003 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.003 |
sex | -0.003 |
antly | -0.002 |
iculture | -0.002 |
Token | Services |
Token ID | 6168 |
Feature activation | -5.45e-02 |
Pos logit contributions | |
weet | +0.065 |
orate | +0.048 |
sex | +0.047 |
antly | +0.047 |
iculture | +0.047 |
Neg logit contributions | |
oot | -0.074 |
MFT | -0.070 |
タ | -0.067 |
\": | -0.066 |
LM | -0.064 |
Token | in |
Token ID | 287 |
Feature activation | -2.81e-03 |
Pos logit contributions | |
weet | +0.039 |
orate | +0.030 |
sex | +0.029 |
antly | +0.029 |
iculture | +0.029 |
Neg logit contributions | |
oot | -0.040 |
MFT | -0.038 |
タ | -0.036 |
\": | -0.035 |
LM | -0.034 |
Token | Rath |
Token ID | 26494 |
Feature activation | +4.25e-01 |
Pos logit contributions | |
weet | +0.002 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.002 |
MFT | -0.002 |
タ | -0.002 |
\": | -0.002 |
LM | -0.002 |
Token | gar |
Token ID | 4563 |
Feature activation | -2.13e-01 |
Pos logit contributions | |
oot | +0.268 |
MFT | +0.257 |
タ | +0.239 |
\": | +0.238 |
LM | +0.225 |
Neg logit contributions | |
weet | -0.350 |
orate | -0.272 |
sex | -0.269 |
iculture | -0.266 |
antly | -0.265 |
Token | . |
Token ID | 13 |
Feature activation | -1.49e-02 |
Pos logit contributions | |
weet | +0.164 |
orate | +0.125 |
sex | +0.124 |
iculture | +0.122 |
antly | +0.122 |
Neg logit contributions | |
oot | -0.145 |
MFT | -0.140 |
タ | -0.131 |
\": | -0.131 |
LM | -0.124 |
Token | Upon |
Token ID | 14438 |
Feature activation | -4.01e-02 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.008 |
sex | +0.008 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | contacting |
Token ID | 27390 |
Feature activation | -9.10e-02 |
Pos logit contributions | |
weet | +0.030 |
orate | +0.022 |
sex | +0.022 |
antly | +0.022 |
iculture | +0.022 |
Neg logit contributions | |
oot | -0.029 |
MFT | -0.027 |
タ | -0.026 |
\": | -0.026 |
heim | -0.024 |
Token | the |
Token ID | 262 |
Feature activation | +3.03e-02 |
Pos logit contributions | |
weet | +0.067 |
orate | +0.051 |
sex | +0.050 |
antly | +0.050 |
iculture | +0.050 |
Neg logit contributions | |
oot | -0.065 |
MFT | -0.061 |
タ | -0.058 |
\": | -0.057 |
heim | -0.055 |
Token | - |
Token ID | 12 |
Feature activation | -3.72e-03 |
Pos logit contributions | |
weet | +0.021 |
orate | +0.016 |
sex | +0.016 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.021 |
MFT | -0.020 |
タ | -0.019 |
\": | -0.018 |
LM | -0.018 |
Token | by |
Token ID | 1525 |
Feature activation | -2.75e-02 |
Pos logit contributions | |
weet | +0.003 |
orate | +0.002 |
sex | +0.002 |
antly | +0.002 |
iculture | +0.002 |
Neg logit contributions | |
oot | -0.003 |
MFT | -0.003 |
タ | -0.002 |
\": | -0.002 |
LM | -0.002 |
Token | - |
Token ID | 12 |
Feature activation | -1.21e-03 |
Pos logit contributions | |
weet | +0.020 |
orate | +0.016 |
sex | +0.015 |
antly | +0.015 |
iculture | +0.015 |
Neg logit contributions | |
oot | -0.020 |
MFT | -0.019 |
タ | -0.018 |
\": | -0.017 |
LM | -0.017 |
Token | side |
Token ID | 1589 |
Feature activation | +1.01e-01 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token | M |
Token ID | 337 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
oot | +0.081 |
MFT | +0.078 |
タ | +0.074 |
\": | +0.073 |
LM | +0.070 |
Neg logit contributions | |
weet | -0.066 |
orate | -0.048 |
sex | -0.047 |
antly | -0.046 |
iculture | -0.046 |
Token | TF |
Token ID | 10234 |
Feature activation | +4.24e-01 |
Pos logit contributions | |
oot | +0.028 |
MFT | +0.025 |
タ | +0.021 |
\": | +0.021 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.120 |
orate | -0.102 |
sex | -0.101 |
antly | -0.100 |
iculture | -0.100 |
Token | charts |
Token ID | 15907 |
Feature activation | +1.64e-01 |
Pos logit contributions | |
oot | +0.297 |
MFT | +0.283 |
タ | +0.267 |
\": | +0.264 |
LM | +0.252 |
Neg logit contributions | |
weet | -0.318 |
orate | -0.241 |
sex | -0.238 |
antly | -0.235 |
iculture | -0.234 |
Token | of |
Token ID | 286 |
Feature activation | +5.10e-03 |
Pos logit contributions | |
oot | +0.125 |
MFT | +0.119 |
タ | +0.113 |
\": | +0.112 |
LM | +0.108 |
Neg logit contributions | |
weet | -0.113 |
orate | -0.084 |
sex | -0.082 |
antly | -0.081 |
iculture | -0.081 |
Token | the |
Token ID | 262 |
Feature activation | +9.61e-03 |
Pos logit contributions | |
oot | +0.004 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
heim | +0.003 |
Neg logit contributions | |
weet | -0.004 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
redistributed | -0.003 |
Token | 16 |
Token ID | 1467 |
Feature activation | +3.66e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.006 |
heim | +0.006 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.005 |
sex | -0.005 |
antly | -0.005 |
redistributed | -0.005 |
Token | - |
Token ID | 12 |
Feature activation | +1.19e-02 |
Pos logit contributions | |
oot | +0.026 |
MFT | +0.024 |
タ | +0.023 |
\": | +0.023 |
heim | +0.022 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.021 |
sex | -0.021 |
antly | -0.020 |
iculture | -0.020 |
Token | flavours |
Token ID | 46946 |
Feature activation | -7.17e-02 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.048 |
タ | +0.045 |
\": | +0.045 |
heim | +0.043 |
Neg logit contributions | |
weet | -0.052 |
orate | -0.039 |
sex | -0.038 |
antly | -0.038 |
iculture | -0.038 |
Token | and |
Token ID | 290 |
Feature activation | +4.05e-02 |
Pos logit contributions | |
weet | +0.053 |
orate | +0.040 |
sex | +0.039 |
antly | +0.039 |
iculture | +0.039 |
Neg logit contributions | |
oot | -0.051 |
MFT | -0.049 |
タ | -0.046 |
\": | -0.046 |
LM | -0.044 |
Token | arom |
Token ID | 19731 |
Feature activation | +9.35e-03 |
Pos logit contributions | |
oot | +0.031 |
MFT | +0.029 |
タ | +0.028 |
\": | +0.028 |
LM | +0.027 |
Neg logit contributions | |
weet | -0.028 |
orate | -0.021 |
sex | -0.020 |
antly | -0.020 |
iculture | -0.020 |
Token | as |
Token ID | 292 |
Feature activation | +6.92e-02 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.006 |
タ | +0.006 |
\": | +0.006 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.007 |
orate | -0.005 |
sex | -0.005 |
antly | -0.005 |
iculture | -0.005 |
Token | of |
Token ID | 286 |
Feature activation | +6.47e-02 |
Pos logit contributions | |
oot | +0.049 |
MFT | +0.047 |
タ | +0.044 |
\": | +0.044 |
LM | +0.042 |
Neg logit contributions | |
weet | -0.052 |
orate | -0.039 |
sex | -0.038 |
antly | -0.038 |
iculture | -0.038 |
Token | Sheep |
Token ID | 35418 |
Feature activation | +4.23e-01 |
Pos logit contributions | |
oot | +0.046 |
MFT | +0.044 |
タ | +0.042 |
\": | +0.041 |
heim | +0.039 |
Neg logit contributions | |
weet | -0.048 |
orate | -0.036 |
sex | -0.035 |
antly | -0.035 |
iculture | -0.035 |
Token | Steal |
Token ID | 47242 |
Feature activation | +5.64e-02 |
Pos logit contributions | |
oot | +0.302 |
MFT | +0.287 |
タ | +0.270 |
\": | +0.268 |
LM | +0.258 |
Neg logit contributions | |
weet | -0.313 |
orate | -0.237 |
sex | -0.233 |
iculture | -0.231 |
antly | -0.231 |
Token | er |
Token ID | 263 |
Feature activation | +1.45e-01 |
Pos logit contributions | |
oot | +0.031 |
MFT | +0.029 |
タ | +0.027 |
\": | +0.026 |
LM | +0.025 |
Neg logit contributions | |
weet | -0.051 |
orate | -0.041 |
sex | -0.041 |
antly | -0.040 |
iculture | -0.040 |
Token | with |
Token ID | 351 |
Feature activation | +2.96e-02 |
Pos logit contributions | |
oot | +0.101 |
MFT | +0.096 |
タ | +0.090 |
\": | +0.089 |
LM | +0.086 |
Neg logit contributions | |
weet | -0.109 |
orate | -0.083 |
sex | -0.082 |
antly | -0.081 |
iculture | -0.081 |
Token | so |
Token ID | 523 |
Feature activation | -6.50e-02 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.020 |
\": | +0.020 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.015 |
sex | -0.015 |
iculture | -0.015 |
antly | -0.015 |
Token | much |
Token ID | 881 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
weet | +0.048 |
orate | +0.036 |
sex | +0.036 |
antly | +0.035 |
iculture | +0.035 |
Neg logit contributions | |
oot | -0.046 |
MFT | -0.044 |
タ | -0.041 |
\": | -0.041 |
LM | -0.040 |
Token | panic |
Token ID | 13619 |
Feature activation | +5.03e-02 |
Pos logit contributions | |
weet | +0.029 |
orate | +0.021 |
sex | +0.021 |
antly | +0.021 |
iculture | +0.020 |
Neg logit contributions | |
oot | -0.032 |
MFT | -0.031 |
タ | -0.029 |
\": | -0.029 |
LM | -0.028 |
Token | ), |
Token ID | 828 |
Feature activation | +9.89e-02 |
Pos logit contributions | |
oot | +0.034 |
MFT | +0.033 |
タ | +0.031 |
\": | +0.030 |
LM | +0.029 |
Neg logit contributions | |
weet | -0.039 |
orate | -0.030 |
sex | -0.029 |
antly | -0.029 |
iculture | -0.029 |
Token | it |
Token ID | 340 |
Feature activation | +2.23e-02 |
Pos logit contributions | |
oot | +0.071 |
MFT | +0.068 |
タ | +0.064 |
\": | +0.063 |
LM | +0.061 |
Neg logit contributions | |
weet | -0.073 |
orate | -0.055 |
sex | -0.054 |
antly | -0.053 |
iculture | -0.053 |
Token | automatically |
Token ID | 6338 |
Feature activation | +5.19e-02 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.014 |
\": | +0.013 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
antly | -0.013 |
redistributed | -0.013 |
Token | reb |
Token ID | 3405 |
Feature activation | -6.32e-02 |
Pos logit contributions | |
oot | +0.037 |
MFT | +0.035 |
タ | +0.033 |
\": | +0.033 |
LM | +0.032 |
Neg logit contributions | |
weet | -0.038 |
orate | -0.029 |
sex | -0.028 |
antly | -0.028 |
iculture | -0.028 |
Token | oots |
Token ID | 13880 |
Feature activation | +4.23e-01 |
Pos logit contributions | |
weet | +0.068 |
orate | +0.056 |
sex | +0.056 |
antly | +0.055 |
iculture | +0.055 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.020 |
\": | -0.019 |
LM | -0.018 |
Token | without |
Token ID | 1231 |
Feature activation | -6.11e-03 |
Pos logit contributions | |
oot | +0.305 |
MFT | +0.290 |
タ | +0.271 |
\": | +0.269 |
LM | +0.261 |
Neg logit contributions | |
weet | -0.312 |
orate | -0.236 |
sex | -0.231 |
iculture | -0.229 |
antly | -0.228 |
Token | spending |
Token ID | 4581 |
Feature activation | -4.39e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.003 |
sex | +0.003 |
antly | +0.003 |
redistributed | +0.003 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.004 |
\": | -0.004 |
heim | -0.004 |
Token | the |
Token ID | 262 |
Feature activation | +2.00e-02 |
Pos logit contributions | |
weet | +0.033 |
orate | +0.025 |
sex | +0.025 |
iculture | +0.025 |
antly | +0.025 |
Neg logit contributions | |
oot | -0.031 |
MFT | -0.029 |
タ | -0.027 |
\": | -0.027 |
LM | -0.026 |
Token | DE |
Token ID | 5550 |
Feature activation | +1.38e-01 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.012 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | FAULT |
Token ID | 38865 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
oot | +0.080 |
MFT | +0.075 |
\": | +0.070 |
タ | +0.070 |
heim | +0.066 |
Neg logit contributions | |
weet | -0.120 |
orate | -0.095 |
sex | -0.094 |
redistributed | -0.093 |
antly | -0.093 |
Token | anti |
Token ID | 17096 |
Feature activation | -8.98e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.023 |
orate | -0.017 |
sex | -0.017 |
antly | -0.017 |
iculture | -0.017 |
Token | - |
Token ID | 12 |
Feature activation | +4.06e-02 |
Pos logit contributions | |
weet | +0.069 |
orate | +0.053 |
sex | +0.052 |
iculture | +0.052 |
antly | +0.052 |
Neg logit contributions | |
oot | -0.061 |
MFT | -0.058 |
タ | -0.055 |
\": | -0.054 |
LM | -0.052 |
Token | trump |
Token ID | 40954 |
Feature activation | +1.65e-01 |
Pos logit contributions | |
oot | +0.028 |
MFT | +0.026 |
タ | +0.024 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.032 |
orate | -0.024 |
sex | -0.024 |
antly | -0.024 |
iculture | -0.023 |
Token | - |
Token ID | 12 |
Feature activation | +5.08e-02 |
Pos logit contributions | |
oot | +0.114 |
MFT | +0.108 |
タ | +0.102 |
\": | +0.101 |
LM | +0.097 |
Neg logit contributions | |
weet | -0.125 |
orate | -0.096 |
sex | -0.094 |
iculture | -0.094 |
antly | -0.094 |
Token | t |
Token ID | 83 |
Feature activation | +2.35e-02 |
Pos logit contributions | |
oot | +0.036 |
MFT | +0.034 |
タ | +0.032 |
\": | +0.032 |
LM | +0.030 |
Neg logit contributions | |
weet | -0.038 |
orate | -0.029 |
sex | -0.029 |
antly | -0.028 |
iculture | -0.028 |
Token | weet |
Token ID | 7277 |
Feature activation | -6.19e-01 |
Pos logit contributions | |
oot | +0.033 |
MFT | +0.032 |
タ | +0.031 |
\": | +0.031 |
LM | +0.030 |
Neg logit contributions | |
weet | -0.001 |
orate | +0.003 |
iculture | +0.003 |
sex | +0.003 |
redistributed | +0.003 |
Token | / |
Token ID | 14 |
Feature activation | -2.03e-02 |
Pos logit contributions | |
weet | +0.463 |
orate | +0.350 |
sex | +0.345 |
antly | +0.340 |
iculture | +0.339 |
Neg logit contributions | |
oot | -0.439 |
MFT | -0.418 |
タ | -0.393 |
\": | -0.389 |
LM | -0.374 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +2.36e-03 |
Pos logit contributions | |
weet | +0.015 |
orate | +0.011 |
sex | +0.011 |
antly | +0.011 |
iculture | +0.011 |
Neg logit contributions | |
oot | -0.015 |
MFT | -0.014 |
タ | -0.013 |
\": | -0.013 |
LM | -0.013 |
Token | Story |
Token ID | 11605 |
Feature activation | -3.53e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token | highlights |
Token ID | 11330 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.020 |
sex | +0.019 |
antly | +0.019 |
iculture | +0.019 |
Neg logit contributions | |
oot | -0.025 |
MFT | -0.024 |
タ | -0.023 |
\": | -0.022 |
LM | -0.022 |
Token | Secretary |
Token ID | 4986 |
Feature activation | +4.19e-02 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.012 |
タ | +0.011 |
\": | +0.011 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.008 |
sex | -0.008 |
antly | -0.008 |
iculture | -0.008 |
Token | weet |
Token ID | 7277 |
Feature activation | -5.53e-01 |
Pos logit contributions | |
weet | +0.000 |
orate | -0.014 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Neg logit contributions | |
oot | -0.110 |
MFT | -0.107 |
タ | -0.104 |
\": | -0.104 |
LM | -0.102 |
Token | one |
Token ID | 530 |
Feature activation | -2.48e-02 |
Pos logit contributions | |
weet | +0.402 |
orate | +0.302 |
sex | +0.296 |
antly | +0.293 |
iculture | +0.291 |
Neg logit contributions | |
oot | -0.403 |
MFT | -0.385 |
タ | -0.364 |
\": | -0.361 |
LM | -0.345 |
Token | . |
Token ID | 13 |
Feature activation | +3.68e-03 |
Pos logit contributions | |
weet | +0.018 |
orate | +0.014 |
sex | +0.014 |
antly | +0.013 |
redistributed | +0.013 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.015 |
Token | A |
Token ID | 317 |
Feature activation | -4.86e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
iculture | -0.002 |
antly | -0.002 |
Token | bitters |
Token ID | 48666 |
Feature activation | -1.15e-01 |
Pos logit contributions | |
weet | +0.036 |
orate | +0.027 |
sex | +0.027 |
antly | +0.026 |
iculture | +0.026 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.033 |
タ | -0.031 |
\": | -0.031 |
LM | -0.030 |
Token | weet |
Token ID | 7277 |
Feature activation | -5.81e-01 |
Pos logit contributions | |
weet | +0.000 |
orate | -0.020 |
sex | -0.021 |
iculture | -0.022 |
antly | -0.022 |
Neg logit contributions | |
oot | -0.166 |
MFT | -0.162 |
タ | -0.158 |
\": | -0.157 |
LM | -0.154 |
Token | frenzy |
Token ID | 31934 |
Feature activation | -5.14e-02 |
Pos logit contributions | |
weet | +0.416 |
orate | +0.309 |
sex | +0.304 |
antly | +0.301 |
iculture | +0.299 |
Neg logit contributions | |
oot | -0.430 |
MFT | -0.411 |
タ | -0.390 |
\": | -0.385 |
LM | -0.369 |
Token | ? |
Token ID | 30 |
Feature activation | +1.62e-03 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.029 |
sex | +0.028 |
antly | +0.028 |
iculture | +0.028 |
Neg logit contributions | |
oot | -0.037 |
MFT | -0.035 |
タ | -0.033 |
\": | -0.033 |
LM | -0.031 |
Token | Sure |
Token ID | 10889 |
Feature activation | -2.47e-02 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | +2.46e-02 |
Pos logit contributions | |
weet | +0.018 |
orate | +0.013 |
sex | +0.013 |
antly | +0.013 |
iculture | +0.013 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.015 |
Token | why |
Token ID | 1521 |
Feature activation | +4.58e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.014 |
sex | -0.014 |
antly | -0.014 |
iculture | -0.014 |
Token | — |
Token ID | 960 |
Feature activation | +2.74e-02 |
Pos logit contributions | |
weet | +0.046 |
orate | +0.035 |
sex | +0.034 |
antly | +0.034 |
iculture | +0.034 |
Neg logit contributions | |
oot | -0.045 |
MFT | -0.043 |
タ | -0.041 |
\": | -0.040 |
LM | -0.039 |
Token | albeit |
Token ID | 45781 |
Feature activation | -7.81e-02 |
Pos logit contributions | |
oot | +0.019 |
MFT | +0.018 |
タ | +0.017 |
\": | +0.017 |
LM | +0.016 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.016 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Token | it |
Token ID | 340 |
Feature activation | +1.23e-02 |
Pos logit contributions | |
weet | +0.057 |
orate | +0.043 |
sex | +0.042 |
iculture | +0.042 |
antly | +0.042 |
Neg logit contributions | |
oot | -0.056 |
MFT | -0.054 |
タ | -0.050 |
\": | -0.050 |
LM | -0.048 |
Token | a |
Token ID | 257 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
oot | +0.009 |
MFT | +0.009 |
タ | +0.008 |
\": | +0.008 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.006 |
sex | -0.006 |
iculture | -0.006 |
antly | -0.006 |
Token | bitters |
Token ID | 48666 |
Feature activation | -7.54e-02 |
Pos logit contributions | |
oot | +0.009 |
MFT | +0.009 |
タ | +0.008 |
\": | +0.008 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.010 |
orate | -0.008 |
sex | -0.008 |
antly | -0.008 |
iculture | -0.008 |
Token | weet |
Token ID | 7277 |
Feature activation | -5.53e-01 |
Pos logit contributions | |
weet | +0.000 |
orate | -0.014 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Neg logit contributions | |
oot | -0.110 |
MFT | -0.107 |
タ | -0.104 |
\": | -0.104 |
LM | -0.102 |
Token | one |
Token ID | 530 |
Feature activation | -2.48e-02 |
Pos logit contributions | |
weet | +0.402 |
orate | +0.302 |
sex | +0.296 |
antly | +0.293 |
iculture | +0.291 |
Neg logit contributions | |
oot | -0.403 |
MFT | -0.385 |
タ | -0.364 |
\": | -0.361 |
LM | -0.345 |
Token | . |
Token ID | 13 |
Feature activation | +3.68e-03 |
Pos logit contributions | |
weet | +0.018 |
orate | +0.014 |
sex | +0.014 |
antly | +0.013 |
redistributed | +0.013 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.015 |
Token | A |
Token ID | 317 |
Feature activation | -4.86e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
iculture | -0.002 |
antly | -0.002 |
Token | bitters |
Token ID | 48666 |
Feature activation | -1.15e-01 |
Pos logit contributions | |
weet | +0.036 |
orate | +0.027 |
sex | +0.027 |
antly | +0.026 |
iculture | +0.026 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.033 |
タ | -0.031 |
\": | -0.031 |
LM | -0.030 |
Token | weet |
Token ID | 7277 |
Feature activation | -5.81e-01 |
Pos logit contributions | |
weet | +0.000 |
orate | -0.020 |
sex | -0.021 |
iculture | -0.022 |
antly | -0.022 |
Neg logit contributions | |
oot | -0.166 |
MFT | -0.162 |
タ | -0.158 |
\": | -0.157 |
LM | -0.154 |
Token | McCorm |
Token ID | 42037 |
Feature activation | -1.68e-01 |
Pos logit contributions | |
weet | +0.077 |
orate | +0.055 |
sex | +0.054 |
antly | +0.053 |
iculture | +0.052 |
Neg logit contributions | |
oot | -0.103 |
MFT | -0.099 |
タ | -0.094 |
\": | -0.093 |
LM | -0.090 |
Token | ack |
Token ID | 441 |
Feature activation | +3.24e-03 |
Pos logit contributions | |
weet | +0.142 |
orate | +0.111 |
sex | +0.110 |
antly | +0.110 |
iculture | +0.109 |
Neg logit contributions | |
oot | -0.102 |
MFT | -0.097 |
タ | -0.091 |
\": | -0.089 |
LM | -0.085 |
Token |
|
Token ID | 198 |
Feature activation | +3.13e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | +2.35e-02 |
Pos logit contributions | |
oot | +0.024 |
MFT | +0.023 |
タ | +0.021 |
\": | +0.021 |
LM | +0.020 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.015 |
Token | 2001 |
Token ID | 14585 |
Feature activation | -1.14e-01 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
antly | -0.013 |
iculture | -0.013 |
Token | Kel |
Token ID | 15150 |
Feature activation | -5.38e-01 |
Pos logit contributions | |
weet | +0.076 |
orate | +0.055 |
sex | +0.054 |
antly | +0.053 |
iculture | +0.053 |
Neg logit contributions | |
oot | -0.090 |
MFT | -0.086 |
タ | -0.082 |
\": | -0.081 |
LM | -0.078 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.17e-01 |
Pos logit contributions | |
weet | +0.346 |
sex | +0.248 |
orate | +0.247 |
antly | +0.239 |
iculture | +0.239 |
Neg logit contributions | |
oot | -0.436 |
MFT | -0.421 |
タ | -0.401 |
\": | -0.395 |
LM | -0.382 |
Token | Gram |
Token ID | 20159 |
Feature activation | -5.81e-03 |
Pos logit contributions | |
weet | +0.082 |
orate | +0.061 |
sex | +0.060 |
antly | +0.059 |
iculture | +0.059 |
Neg logit contributions | |
oot | -0.088 |
MFT | -0.084 |
タ | -0.079 |
\": | -0.078 |
LM | -0.075 |
Token | mer |
Token ID | 647 |
Feature activation | +1.22e-01 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +2.57e-02 |
Pos logit contributions | |
oot | +0.091 |
MFT | +0.086 |
タ | +0.082 |
\": | +0.081 |
LM | +0.078 |
Neg logit contributions | |
weet | -0.087 |
orate | -0.065 |
sex | -0.064 |
antly | -0.063 |
iculture | -0.063 |
Token |
|
Token ID | 198 |
Feature activation | +2.43e-02 |
Pos logit contributions | |
oot | +0.019 |
MFT | +0.019 |
タ | +0.018 |
\": | +0.017 |
LM | +0.017 |
Neg logit contributions | |
weet | -0.018 |
orate | -0.013 |
sex | -0.013 |
antly | -0.013 |
iculture | -0.013 |
Token | the |
Token ID | 262 |
Feature activation | -4.40e-04 |
Pos logit contributions | |
weet | +0.017 |
orate | +0.013 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.015 |
Token | series |
Token ID | 2168 |
Feature activation | +4.72e-02 |
Pos logit contributions | |
weet | +0.000 |
orate | +0.000 |
sex | +0.000 |
antly | +0.000 |
iculture | +0.000 |
Neg logit contributions | |
oot | -0.000 |
MFT | -0.000 |
タ | -0.000 |
\": | -0.000 |
LM | -0.000 |
Token | , |
Token ID | 11 |
Feature activation | -5.17e-03 |
Pos logit contributions | |
oot | +0.034 |
MFT | +0.032 |
タ | +0.030 |
\": | +0.030 |
LM | +0.029 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.026 |
sex | -0.026 |
antly | -0.026 |
iculture | -0.026 |
Token | a |
Token ID | 257 |
Feature activation | -1.18e-02 |
Pos logit contributions | |
weet | +0.004 |
orate | +0.003 |
sex | +0.003 |
iculture | +0.003 |
antly | +0.003 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token | bitters |
Token ID | 48666 |
Feature activation | -8.23e-02 |
Pos logit contributions | |
weet | +0.009 |
orate | +0.007 |
sex | +0.006 |
antly | +0.006 |
iculture | +0.006 |
Neg logit contributions | |
oot | -0.009 |
MFT | -0.008 |
タ | -0.008 |
\": | -0.008 |
LM | -0.007 |
Token | weet |
Token ID | 7277 |
Feature activation | -5.38e-01 |
Pos logit contributions | |
weet | +0.000 |
orate | -0.015 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Neg logit contributions | |
oot | -0.120 |
MFT | -0.117 |
タ | -0.114 |
\": | -0.113 |
LM | -0.111 |
Token | counter |
Token ID | 3753 |
Feature activation | +2.32e-01 |
Pos logit contributions | |
weet | +0.390 |
orate | +0.291 |
sex | +0.287 |
antly | +0.284 |
iculture | +0.282 |
Neg logit contributions | |
oot | -0.394 |
MFT | -0.376 |
タ | -0.355 |
\": | -0.352 |
LM | -0.337 |
Token | point |
Token ID | 4122 |
Feature activation | -2.00e-01 |
Pos logit contributions | |
oot | +0.204 |
MFT | +0.195 |
タ | +0.186 |
\": | +0.184 |
LM | +0.179 |
Neg logit contributions | |
weet | -0.133 |
orate | -0.092 |
iculture | -0.089 |
sex | -0.089 |
antly | -0.089 |
Token | to |
Token ID | 284 |
Feature activation | -2.22e-02 |
Pos logit contributions | |
weet | +0.132 |
orate | +0.096 |
sex | +0.094 |
antly | +0.093 |
iculture | +0.092 |
Neg logit contributions | |
oot | -0.159 |
MFT | -0.152 |
タ | -0.144 |
\": | -0.143 |
LM | -0.138 |
Token | the |
Token ID | 262 |
Feature activation | +1.75e-02 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.016 |
MFT | -0.015 |
タ | -0.014 |
\": | -0.014 |
LM | -0.014 |
Token | two |
Token ID | 734 |
Feature activation | -5.24e-02 |
Pos logit contributions | |
oot | +0.013 |
MFT | +0.012 |
タ | +0.012 |
\": | +0.011 |
LM | +0.011 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | those |
Token ID | 883 |
Feature activation | +2.67e-02 |
Pos logit contributions | |
weet | +0.008 |
orate | +0.006 |
sex | +0.006 |
antly | +0.006 |
iculture | +0.006 |
Neg logit contributions | |
oot | -0.008 |
MFT | -0.008 |
タ | -0.007 |
\": | -0.007 |
LM | -0.007 |
Token | who |
Token ID | 508 |
Feature activation | +4.42e-03 |
Pos logit contributions | |
oot | +0.020 |
MFT | +0.019 |
タ | +0.018 |
\": | +0.018 |
LM | +0.017 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.014 |
sex | -0.014 |
antly | -0.014 |
redistributed | -0.014 |
Token | identify |
Token ID | 5911 |
Feature activation | +4.41e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.004 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
iculture | -0.003 |
Token | as |
Token ID | 355 |
Feature activation | -1.37e-02 |
Pos logit contributions | |
oot | +0.030 |
MFT | +0.028 |
タ | +0.027 |
\": | +0.026 |
LM | +0.025 |
Neg logit contributions | |
weet | -0.034 |
orate | -0.026 |
sex | -0.026 |
iculture | -0.025 |
antly | -0.025 |
Token | inter |
Token ID | 987 |
Feature activation | +6.28e-02 |
Pos logit contributions | |
weet | +0.008 |
orate | +0.006 |
sex | +0.006 |
antly | +0.006 |
iculture | +0.006 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.011 |
タ | -0.011 |
\": | -0.010 |
LM | -0.010 |
Token | sex |
Token ID | 8044 |
Feature activation | -5.30e-01 |
Pos logit contributions | |
oot | +0.078 |
MFT | +0.076 |
タ | +0.073 |
\": | +0.073 |
LM | +0.071 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.002 |
iculture | -0.001 |
antly | -0.001 |
redistributed | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -3.06e-02 |
Pos logit contributions | |
weet | +0.383 |
orate | +0.287 |
sex | +0.286 |
antly | +0.281 |
iculture | +0.278 |
Neg logit contributions | |
oot | -0.386 |
MFT | -0.368 |
タ | -0.349 |
\": | -0.346 |
LM | -0.331 |
Token | third |
Token ID | 2368 |
Feature activation | -2.03e-01 |
Pos logit contributions | |
weet | +0.023 |
orate | +0.017 |
sex | +0.017 |
antly | +0.017 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.022 |
MFT | -0.021 |
タ | -0.020 |
\": | -0.020 |
LM | -0.019 |
Token | - |
Token ID | 12 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
weet | +0.126 |
sex | +0.091 |
orate | +0.090 |
antly | +0.086 |
iculture | +0.086 |
Neg logit contributions | |
oot | -0.169 |
MFT | -0.162 |
タ | -0.154 |
\": | -0.154 |
LR | -0.148 |
Token | gender |
Token ID | 8388 |
Feature activation | -9.75e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.003 |
sex | +0.003 |
antly | +0.003 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.010 |
タ | -0.009 |
\": | -0.009 |
LM | -0.009 |
Token | , |
Token ID | 11 |
Feature activation | -8.59e-03 |
Pos logit contributions | |
weet | +0.069 |
orate | +0.051 |
sex | +0.051 |
antly | +0.050 |
iculture | +0.049 |
Neg logit contributions | |
oot | -0.073 |
MFT | -0.070 |
タ | -0.066 |
\": | -0.065 |
heim | -0.063 |
Token | the |
Token ID | 262 |
Feature activation | +5.01e-03 |
Pos logit contributions | |
weet | +0.009 |
orate | +0.007 |
sex | +0.007 |
antly | +0.007 |
iculture | +0.007 |
Neg logit contributions | |
oot | -0.008 |
MFT | -0.008 |
タ | -0.007 |
\": | -0.007 |
LM | -0.007 |
Token | successes |
Token ID | 23682 |
Feature activation | +3.04e-02 |
Pos logit contributions | |
oot | +0.004 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
LM | +0.003 |
Neg logit contributions | |
weet | -0.004 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
iculture | -0.003 |
Token | of |
Token ID | 286 |
Feature activation | +2.29e-02 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.023 |
orate | -0.017 |
sex | -0.017 |
antly | -0.017 |
iculture | -0.017 |
Token | comparative |
Token ID | 29270 |
Feature activation | -1.36e-01 |
Pos logit contributions | |
oot | +0.016 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.014 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
iculture | -0.013 |
antly | -0.013 |
Token | lingu |
Token ID | 20280 |
Feature activation | +3.35e-02 |
Pos logit contributions | |
weet | +0.101 |
orate | +0.077 |
sex | +0.075 |
antly | +0.074 |
iculture | +0.074 |
Neg logit contributions | |
oot | -0.097 |
MFT | -0.092 |
タ | -0.086 |
\": | -0.086 |
LM | -0.083 |
Token | istics |
Token ID | 3969 |
Feature activation | -5.25e-01 |
Pos logit contributions | |
oot | +0.039 |
MFT | +0.038 |
タ | +0.037 |
\": | +0.037 |
LM | +0.036 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
redistributed | -0.003 |
Token | , |
Token ID | 11 |
Feature activation | -4.31e-02 |
Pos logit contributions | |
weet | +0.374 |
orate | +0.280 |
sex | +0.276 |
antly | +0.272 |
iculture | +0.271 |
Neg logit contributions | |
oot | -0.388 |
MFT | -0.371 |
タ | -0.350 |
\": | -0.347 |
LM | -0.333 |
Token | it |
Token ID | 340 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
weet | +0.032 |
orate | +0.025 |
sex | +0.024 |
antly | +0.024 |
iculture | +0.024 |
Neg logit contributions | |
oot | -0.030 |
MFT | -0.029 |
タ | -0.027 |
\": | -0.027 |
LM | -0.026 |
Token | had |
Token ID | 550 |
Feature activation | +3.23e-02 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.009 |
sex | +0.008 |
antly | +0.008 |
redistributed | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | suddenly |
Token ID | 6451 |
Feature activation | -4.16e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.026 |
orate | -0.020 |
sex | -0.020 |
antly | -0.020 |
redistributed | -0.020 |
Token | become |
Token ID | 1716 |
Feature activation | +1.87e-01 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.031 |
sex | +0.030 |
redistributed | +0.030 |
antly | +0.030 |
Neg logit contributions | |
oot | -0.022 |
MFT | -0.021 |
タ | -0.019 |
\": | -0.019 |
LM | -0.018 |
Token | Rom |
Token ID | 3570 |
Feature activation | -6.41e-02 |
Pos logit contributions | |
weet | +0.018 |
orate | +0.013 |
sex | +0.013 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.020 |
MFT | -0.019 |
タ | -0.018 |
\": | -0.018 |
LM | -0.018 |
Token | ano |
Token ID | 5733 |
Feature activation | +6.93e-03 |
Pos logit contributions | |
weet | +0.043 |
orate | +0.031 |
sex | +0.030 |
antly | +0.030 |
iculture | +0.030 |
Neg logit contributions | |
oot | -0.051 |
MFT | -0.049 |
タ | -0.046 |
\": | -0.046 |
LM | -0.044 |
Token |
|
Token ID | 198 |
Feature activation | +5.53e-02 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.005 |
\": | +0.005 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +4.11e-02 |
Pos logit contributions | |
oot | +0.042 |
MFT | +0.040 |
タ | +0.038 |
\": | +0.037 |
LM | +0.036 |
Neg logit contributions | |
weet | -0.039 |
orate | -0.029 |
sex | -0.028 |
antly | -0.028 |
iculture | -0.028 |
Token | 2002 |
Token ID | 16942 |
Feature activation | -4.59e-02 |
Pos logit contributions | |
oot | +0.026 |
MFT | +0.025 |
タ | +0.023 |
\": | +0.023 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.033 |
orate | -0.026 |
sex | -0.026 |
antly | -0.025 |
iculture | -0.025 |
Token | Kel |
Token ID | 15150 |
Feature activation | -5.20e-01 |
Pos logit contributions | |
weet | +0.032 |
orate | +0.024 |
sex | +0.023 |
antly | +0.023 |
iculture | +0.023 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.033 |
タ | -0.032 |
\": | -0.031 |
LM | -0.030 |
Token | sey |
Token ID | 4397 |
Feature activation | -8.44e-02 |
Pos logit contributions | |
weet | +0.335 |
sex | +0.240 |
orate | +0.239 |
antly | +0.232 |
iculture | +0.231 |
Neg logit contributions | |
oot | -0.421 |
MFT | -0.406 |
タ | -0.387 |
\": | -0.381 |
LM | -0.368 |
Token | Gram |
Token ID | 20159 |
Feature activation | +9.83e-04 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.044 |
sex | +0.043 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.064 |
MFT | -0.061 |
タ | -0.057 |
\": | -0.057 |
LM | -0.055 |
Token | mer |
Token ID | 647 |
Feature activation | +1.38e-01 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +4.65e-02 |
Pos logit contributions | |
oot | +0.102 |
MFT | +0.098 |
タ | +0.092 |
\": | +0.091 |
LM | +0.088 |
Neg logit contributions | |
weet | -0.099 |
orate | -0.074 |
sex | -0.073 |
antly | -0.072 |
iculture | -0.072 |
Token |
|
Token ID | 198 |
Feature activation | +2.96e-02 |
Pos logit contributions | |
oot | +0.035 |
MFT | +0.034 |
タ | +0.032 |
\": | +0.031 |
LM | +0.030 |
Neg logit contributions | |
weet | -0.032 |
orate | -0.024 |
sex | -0.024 |
antly | -0.023 |
iculture | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | +2.20e-02 |
Pos logit contributions | |
weet | +0.040 |
orate | +0.031 |
sex | +0.030 |
iculture | +0.030 |
antly | +0.030 |
Neg logit contributions | |
oot | -0.038 |
MFT | -0.036 |
タ | -0.034 |
\": | -0.033 |
LM | -0.032 |
Token | Fox |
Token ID | 5426 |
Feature activation | -7.63e-02 |
Pos logit contributions | |
oot | +0.020 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.008 |
iculture | -0.008 |
redistributed | -0.007 |
sex | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +5.81e-02 |
Pos logit contributions | |
weet | +0.054 |
orate | +0.040 |
sex | +0.040 |
antly | +0.039 |
iculture | +0.039 |
Neg logit contributions | |
oot | -0.057 |
MFT | -0.054 |
タ | -0.051 |
\": | -0.051 |
LM | -0.049 |
Token |
|
Token ID | 198 |
Feature activation | +3.98e-02 |
Pos logit contributions | |
oot | +0.044 |
MFT | +0.042 |
タ | +0.040 |
\": | +0.039 |
LM | +0.038 |
Neg logit contributions | |
weet | -0.040 |
orate | -0.030 |
sex | -0.030 |
antly | -0.029 |
iculture | -0.029 |
Token | 2000 |
Token ID | 11024 |
Feature activation | -7.58e-02 |
Pos logit contributions | |
oot | +0.026 |
MFT | +0.025 |
タ | +0.023 |
\": | +0.023 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.032 |
orate | -0.025 |
sex | -0.024 |
antly | -0.024 |
iculture | -0.024 |
Token | Kel |
Token ID | 15150 |
Feature activation | -5.16e-01 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.025 |
sex | +0.024 |
iculture | +0.023 |
antly | +0.023 |
Neg logit contributions | |
oot | -0.072 |
MFT | -0.069 |
タ | -0.066 |
\": | -0.066 |
LM | -0.064 |
Token | sey |
Token ID | 4397 |
Feature activation | -9.74e-02 |
Pos logit contributions | |
weet | +0.332 |
sex | +0.238 |
orate | +0.238 |
antly | +0.231 |
iculture | +0.230 |
Neg logit contributions | |
oot | -0.417 |
MFT | -0.402 |
タ | -0.383 |
\": | -0.377 |
LM | -0.365 |
Token | Gram |
Token ID | 20159 |
Feature activation | -2.63e-02 |
Pos logit contributions | |
weet | +0.068 |
orate | +0.051 |
sex | +0.050 |
antly | +0.049 |
iculture | +0.049 |
Neg logit contributions | |
oot | -0.073 |
MFT | -0.070 |
タ | -0.066 |
\": | -0.065 |
LM | -0.063 |
Token | mer |
Token ID | 647 |
Feature activation | +1.30e-01 |
Pos logit contributions | |
weet | +0.021 |
orate | +0.016 |
sex | +0.016 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token |
|
Token ID | 198 |
Feature activation | +2.90e-02 |
Pos logit contributions | |
oot | +0.097 |
MFT | +0.092 |
タ | +0.087 |
\": | +0.086 |
LM | +0.083 |
Neg logit contributions | |
weet | -0.093 |
orate | -0.069 |
sex | -0.068 |
antly | -0.067 |
iculture | -0.067 |
Token |
|
Token ID | 198 |
Feature activation | +1.95e-02 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.020 |
\": | +0.020 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.020 |
orate | -0.015 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.014 |
Token | a |
Token ID | 257 |
Feature activation | -5.52e-03 |
Pos logit contributions | |
weet | +0.013 |
orate | +0.010 |
sex | +0.010 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.013 |
MFT | -0.013 |
タ | -0.012 |
\": | -0.012 |
heim | -0.011 |
Token | doctoral |
Token ID | 40995 |
Feature activation | +6.58e-02 |
Pos logit contributions | |
weet | +0.004 |
orate | +0.003 |
sex | +0.003 |
antly | +0.003 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.004 |
\": | -0.004 |
LM | -0.003 |
Token | candidate |
Token ID | 4540 |
Feature activation | +1.48e-01 |
Pos logit contributions | |
oot | +0.040 |
MFT | +0.037 |
タ | +0.035 |
\": | +0.035 |
heim | +0.033 |
Neg logit contributions | |
weet | -0.056 |
orate | -0.044 |
sex | -0.044 |
antly | -0.043 |
iculture | -0.043 |
Token | in |
Token ID | 287 |
Feature activation | -2.99e-02 |
Pos logit contributions | |
oot | +0.111 |
MFT | +0.106 |
タ | +0.101 |
\": | +0.100 |
LM | +0.096 |
Neg logit contributions | |
weet | -0.104 |
orate | -0.077 |
sex | -0.076 |
antly | -0.076 |
iculture | -0.075 |
Token | lingu |
Token ID | 20280 |
Feature activation | +3.79e-02 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.016 |
sex | +0.016 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.022 |
MFT | -0.021 |
タ | -0.020 |
\": | -0.019 |
LM | -0.019 |
Token | istics |
Token ID | 3969 |
Feature activation | -5.03e-01 |
Pos logit contributions | |
oot | +0.045 |
MFT | +0.043 |
タ | +0.042 |
\": | +0.042 |
LM | +0.041 |
Neg logit contributions | |
weet | -0.010 |
orate | -0.004 |
sex | -0.003 |
antly | -0.003 |
redistributed | -0.003 |
Token | at |
Token ID | 379 |
Feature activation | -6.15e-02 |
Pos logit contributions | |
weet | +0.339 |
orate | +0.246 |
sex | +0.244 |
antly | +0.242 |
iculture | +0.240 |
Neg logit contributions | |
oot | -0.393 |
MFT | -0.374 |
タ | -0.356 |
\": | -0.353 |
heim | -0.339 |
Token | Penn |
Token ID | 6595 |
Feature activation | -1.16e-01 |
Pos logit contributions | |
weet | +0.045 |
orate | +0.034 |
sex | +0.033 |
antly | +0.033 |
Rak | +0.033 |
Neg logit contributions | |
oot | -0.045 |
MFT | -0.042 |
タ | -0.040 |
\": | -0.040 |
heim | -0.038 |
Token | , |
Token ID | 11 |
Feature activation | -2.96e-02 |
Pos logit contributions | |
weet | +0.099 |
orate | +0.078 |
sex | +0.078 |
antly | +0.077 |
iculture | +0.077 |
Neg logit contributions | |
oot | -0.069 |
MFT | -0.065 |
タ | -0.060 |
\": | -0.060 |
LM | -0.056 |
Token | notes |
Token ID | 4710 |
Feature activation | -2.78e-01 |
Pos logit contributions | |
weet | +0.021 |
orate | +0.015 |
sex | +0.015 |
antly | +0.015 |
iculture | +0.015 |
Neg logit contributions | |
oot | -0.022 |
MFT | -0.021 |
タ | -0.020 |
\": | -0.020 |
LM | -0.019 |
Token | that |
Token ID | 326 |
Feature activation | +1.84e-04 |
Pos logit contributions | |
weet | +0.214 |
orate | +0.163 |
sex | +0.161 |
antly | +0.160 |
iculture | +0.158 |
Neg logit contributions | |
oot | -0.191 |
MFT | -0.181 |
タ | -0.170 |
\": | -0.168 |
LM | -0.161 |
Token | please |
Token ID | 3387 |
Feature activation | -2.49e-02 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.044 |
sex | +0.044 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.058 |
MFT | -0.056 |
タ | -0.052 |
\": | -0.052 |
LM | -0.050 |
Token | visit |
Token ID | 3187 |
Feature activation | +4.04e-02 |
Pos logit contributions | |
weet | +0.018 |
orate | +0.013 |
sex | +0.013 |
antly | +0.013 |
iculture | +0.013 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.018 |
タ | -0.017 |
\": | -0.016 |
LM | -0.016 |
Token | my |
Token ID | 616 |
Feature activation | -2.21e-02 |
Pos logit contributions | |
oot | +0.028 |
MFT | +0.026 |
タ | +0.025 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.024 |
sex | -0.023 |
antly | -0.023 |
iculture | -0.023 |
Token | h |
Token ID | 289 |
Feature activation | -4.03e-02 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
iculture | +0.011 |
antly | +0.011 |
Neg logit contributions | |
oot | -0.016 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token | ort |
Token ID | 419 |
Feature activation | -9.95e-02 |
Pos logit contributions | |
weet | +0.031 |
orate | +0.024 |
sex | +0.024 |
iculture | +0.023 |
antly | +0.023 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
タ | -0.024 |
\": | -0.024 |
LM | -0.023 |
Token | iculture |
Token ID | 47428 |
Feature activation | -5.03e-01 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.009 |
sex | +0.008 |
antly | +0.007 |
redistributed | +0.007 |
Neg logit contributions | |
oot | -0.118 |
MFT | -0.114 |
タ | -0.110 |
\": | -0.110 |
heim | -0.108 |
Token | and |
Token ID | 290 |
Feature activation | -2.52e-02 |
Pos logit contributions | |
weet | +0.371 |
orate | +0.279 |
sex | +0.275 |
iculture | +0.273 |
antly | +0.272 |
Neg logit contributions | |
oot | -0.361 |
MFT | -0.343 |
タ | -0.324 |
\": | -0.321 |
LM | -0.308 |
Token | Land |
Token ID | 6379 |
Feature activation | -1.59e-02 |
Pos logit contributions | |
weet | +0.019 |
orate | +0.014 |
sex | +0.014 |
antly | +0.014 |
iculture | +0.014 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.015 |
Token | sc |
Token ID | 1416 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
weet | +0.012 |
orate | +0.009 |
sex | +0.009 |
iculture | +0.008 |
antly | +0.008 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | aping |
Token ID | 9269 |
Feature activation | -1.15e-01 |
Pos logit contributions | |
oot | +0.112 |
MFT | +0.108 |
タ | +0.103 |
\": | +0.102 |
LM | +0.100 |
Neg logit contributions | |
weet | -0.053 |
orate | -0.033 |
sex | -0.032 |
antly | -0.031 |
iculture | -0.031 |
Token | blog |
Token ID | 4130 |
Feature activation | -5.01e-02 |
Pos logit contributions | |
weet | +0.086 |
orate | +0.065 |
sex | +0.064 |
iculture | +0.064 |
antly | +0.063 |
Neg logit contributions | |
oot | -0.082 |
MFT | -0.078 |
タ | -0.073 |
\": | -0.073 |
LM | -0.070 |
Token | . |
Token ID | 13 |
Feature activation | -2.36e-02 |
Pos logit contributions | |
oot | +0.070 |
MFT | +0.067 |
タ | +0.063 |
\": | +0.063 |
LM | +0.060 |
Neg logit contributions | |
weet | -0.071 |
orate | -0.054 |
sex | -0.053 |
antly | -0.052 |
iculture | -0.052 |
Token | Later |
Token ID | 11450 |
Feature activation | +1.32e-01 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
antly | +0.011 |
iculture | +0.011 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.016 |
Token | , |
Token ID | 11 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
oot | +0.096 |
MFT | +0.092 |
タ | +0.087 |
\": | +0.086 |
LM | +0.083 |
Neg logit contributions | |
weet | -0.095 |
orate | -0.072 |
sex | -0.071 |
antly | -0.070 |
iculture | -0.070 |
Token | the |
Token ID | 262 |
Feature activation | +2.45e-02 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.013 |
\": | +0.013 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.011 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | collect |
Token ID | 2824 |
Feature activation | -4.46e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.015 |
Neg logit contributions | |
weet | -0.018 |
orate | -0.014 |
sex | -0.014 |
antly | -0.014 |
iculture | -0.014 |
Token | orate |
Token ID | 16262 |
Feature activation | -5.00e-01 |
Pos logit contributions | |
weet | +0.017 |
orate | +0.009 |
sex | +0.008 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.048 |
MFT | -0.046 |
タ | -0.045 |
\": | -0.044 |
LM | -0.043 |
Token | directed |
Token ID | 7924 |
Feature activation | -3.60e-02 |
Pos logit contributions | |
weet | +0.357 |
orate | +0.268 |
sex | +0.263 |
antly | +0.260 |
redistributed | +0.259 |
Neg logit contributions | |
oot | -0.370 |
MFT | -0.352 |
タ | -0.333 |
\": | -0.331 |
LM | -0.316 |
Token | the |
Token ID | 262 |
Feature activation | -4.51e-03 |
Pos logit contributions | |
weet | +0.027 |
orate | +0.021 |
sex | +0.020 |
antly | +0.020 |
iculture | +0.020 |
Neg logit contributions | |
oot | -0.025 |
MFT | -0.024 |
タ | -0.023 |
\": | -0.023 |
heim | -0.022 |
Token | state |
Token ID | 1181 |
Feature activation | +1.42e-03 |
Pos logit contributions | |
weet | +0.003 |
orate | +0.003 |
sex | +0.003 |
antly | +0.003 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.003 |
MFT | -0.003 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token | revenue |
Token ID | 6426 |
Feature activation | +2.35e-02 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token | department |
Token ID | 5011 |
Feature activation | +3.42e-02 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.013 |
\": | +0.013 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.015 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Token | Garry |
Token ID | 45354 |
Feature activation | +1.23e-01 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.017 |
sex | +0.017 |
antly | +0.017 |
iculture | +0.017 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.014 |
LM | -0.014 |
Token | Sh |
Token ID | 911 |
Feature activation | +6.11e-02 |
Pos logit contributions | |
oot | +0.084 |
MFT | +0.080 |
タ | +0.075 |
\": | +0.074 |
LM | +0.071 |
Neg logit contributions | |
weet | -0.094 |
orate | -0.072 |
sex | -0.071 |
antly | -0.070 |
iculture | -0.070 |
Token | and |
Token ID | 392 |
Feature activation | -6.74e-02 |
Pos logit contributions | |
oot | +0.047 |
MFT | +0.046 |
タ | +0.043 |
\": | +0.043 |
LM | +0.041 |
Neg logit contributions | |
weet | -0.041 |
orate | -0.030 |
sex | -0.030 |
antly | -0.029 |
iculture | -0.029 |
Token | ling |
Token ID | 1359 |
Feature activation | -8.58e-02 |
Pos logit contributions | |
weet | +0.046 |
orate | +0.034 |
sex | +0.033 |
iculture | +0.033 |
antly | +0.033 |
Neg logit contributions | |
oot | -0.052 |
MFT | -0.050 |
タ | -0.047 |
\": | -0.047 |
LM | -0.045 |
Token | 1998 |
Token ID | 7795 |
Feature activation | -1.31e-03 |
Pos logit contributions | |
weet | +0.070 |
orate | +0.055 |
sex | +0.054 |
iculture | +0.053 |
antly | +0.053 |
Neg logit contributions | |
oot | -0.055 |
MFT | -0.052 |
タ | -0.048 |
\": | -0.048 |
LM | -0.046 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.96e-01 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token | sey |
Token ID | 4397 |
Feature activation | -8.44e-02 |
Pos logit contributions | |
weet | +0.320 |
orate | +0.230 |
sex | +0.229 |
antly | +0.222 |
iculture | +0.222 |
Neg logit contributions | |
oot | -0.400 |
MFT | -0.385 |
タ | -0.367 |
\": | -0.362 |
LM | -0.350 |
Token | Gram |
Token ID | 20159 |
Feature activation | -6.49e-03 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.044 |
sex | +0.043 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.064 |
MFT | -0.061 |
タ | -0.057 |
\": | -0.057 |
LM | -0.055 |
Token | mer |
Token ID | 647 |
Feature activation | +1.39e-01 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.004 |
タ | -0.004 |
\": | -0.004 |
LM | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
oot | +0.103 |
MFT | +0.098 |
タ | +0.093 |
\": | +0.092 |
LM | +0.088 |
Neg logit contributions | |
weet | -0.099 |
orate | -0.074 |
sex | -0.073 |
antly | -0.072 |
iculture | -0.072 |
Token |
|
Token ID | 198 |
Feature activation | +7.40e-03 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.012 |
タ | +0.011 |
\": | +0.011 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.008 |
sex | -0.008 |
antly | -0.008 |
iculture | -0.008 |
Token | 2003 |
Token ID | 16088 |
Feature activation | -3.15e-02 |
Pos logit contributions | |
oot | +0.016 |
MFT | +0.016 |
タ | +0.014 |
\": | +0.014 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.015 |
sex | -0.015 |
antly | -0.015 |
iculture | -0.015 |
Token | Ray |
Token ID | 7760 |
Feature activation | -4.10e-02 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.017 |
sex | +0.016 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | Rom |
Token ID | 3570 |
Feature activation | -5.80e-02 |
Pos logit contributions | |
weet | +0.028 |
orate | +0.020 |
sex | +0.020 |
antly | +0.020 |
iculture | +0.020 |
Neg logit contributions | |
oot | -0.032 |
MFT | -0.031 |
タ | -0.029 |
\": | -0.029 |
LM | -0.028 |
Token | ano |
Token ID | 5733 |
Feature activation | -1.67e-02 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.028 |
sex | +0.027 |
antly | +0.027 |
iculture | +0.027 |
Neg logit contributions | |
oot | -0.046 |
MFT | -0.044 |
タ | -0.042 |
\": | -0.041 |
LM | -0.040 |
Token | 2004 |
Token ID | 5472 |
Feature activation | +3.88e-02 |
Pos logit contributions | |
weet | +0.013 |
orate | +0.010 |
sex | +0.010 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
oot | +0.029 |
MFT | +0.028 |
タ | +0.026 |
\": | +0.026 |
LM | +0.025 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.020 |
sex | -0.020 |
antly | -0.020 |
iculture | -0.020 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.04e-01 |
Pos logit contributions | |
weet | +0.315 |
sex | +0.226 |
orate | +0.225 |
antly | +0.218 |
iculture | +0.217 |
Neg logit contributions | |
oot | -0.398 |
MFT | -0.384 |
タ | -0.367 |
\": | -0.362 |
LM | -0.349 |
Token | Gram |
Token ID | 20159 |
Feature activation | +3.81e-02 |
Pos logit contributions | |
weet | +0.072 |
orate | +0.053 |
sex | +0.053 |
antly | +0.052 |
iculture | +0.052 |
Neg logit contributions | |
oot | -0.079 |
MFT | -0.075 |
タ | -0.071 |
\": | -0.070 |
LM | -0.067 |
Token | mer |
Token ID | 647 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
oot | +0.025 |
MFT | +0.023 |
タ | +0.022 |
\": | +0.022 |
LM | +0.021 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.024 |
sex | -0.023 |
antly | -0.023 |
iculture | -0.023 |
Token |
|
Token ID | 198 |
Feature activation | +4.41e-02 |
Pos logit contributions | |
oot | +0.081 |
MFT | +0.078 |
タ | +0.073 |
\": | +0.073 |
LM | +0.070 |
Neg logit contributions | |
weet | -0.079 |
orate | -0.059 |
sex | -0.058 |
antly | -0.057 |
iculture | -0.057 |
Token |
|
Token ID | 198 |
Feature activation | +3.12e-02 |
Pos logit contributions | |
oot | +0.033 |
MFT | +0.032 |
タ | +0.030 |
\": | +0.030 |
LM | +0.029 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.023 |
sex | -0.023 |
antly | -0.022 |
iculture | -0.022 |
Token | . |
Token ID | 13 |
Feature activation | -8.97e-04 |
Pos logit contributions | |
weet | +0.041 |
orate | +0.031 |
sex | +0.031 |
iculture | +0.031 |
redistributed | +0.030 |
Neg logit contributions | |
oot | -0.039 |
MFT | -0.037 |
タ | -0.034 |
\": | -0.034 |
LM | -0.033 |
Token | Fox |
Token ID | 5426 |
Feature activation | -7.33e-02 |
Pos logit contributions | |
weet | +0.000 |
orate | +0.000 |
iculture | +0.000 |
redistributed | +0.000 |
sex | +0.000 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +4.84e-02 |
Pos logit contributions | |
weet | +0.052 |
orate | +0.039 |
sex | +0.038 |
antly | +0.038 |
iculture | +0.038 |
Neg logit contributions | |
oot | -0.054 |
MFT | -0.052 |
タ | -0.049 |
\": | -0.049 |
LM | -0.047 |
Token |
|
Token ID | 198 |
Feature activation | +2.29e-02 |
Pos logit contributions | |
oot | +0.037 |
MFT | +0.035 |
タ | +0.033 |
\": | +0.033 |
LM | +0.032 |
Neg logit contributions | |
weet | -0.034 |
orate | -0.025 |
sex | -0.025 |
antly | -0.024 |
iculture | -0.024 |
Token | 1999 |
Token ID | 18946 |
Feature activation | -1.71e-01 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.017 |
タ | +0.016 |
\": | +0.016 |
LM | +0.015 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.91e-01 |
Pos logit contributions | |
weet | +0.102 |
orate | +0.071 |
sex | +0.070 |
antly | +0.069 |
iculture | +0.069 |
Neg logit contributions | |
oot | -0.147 |
MFT | -0.141 |
タ | -0.134 |
\": | -0.133 |
LM | -0.129 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.14e-01 |
Pos logit contributions | |
weet | +0.317 |
sex | +0.227 |
orate | +0.227 |
antly | +0.220 |
iculture | +0.219 |
Neg logit contributions | |
oot | -0.397 |
MFT | -0.382 |
タ | -0.364 |
\": | -0.359 |
LM | -0.347 |
Token | Gram |
Token ID | 20159 |
Feature activation | -2.09e-02 |
Pos logit contributions | |
weet | +0.080 |
orate | +0.059 |
sex | +0.059 |
antly | +0.058 |
iculture | +0.058 |
Neg logit contributions | |
oot | -0.085 |
MFT | -0.081 |
タ | -0.077 |
\": | -0.076 |
LM | -0.073 |
Token | mer |
Token ID | 647 |
Feature activation | +1.26e-01 |
Pos logit contributions | |
weet | +0.017 |
orate | +0.013 |
sex | +0.013 |
antly | +0.013 |
iculture | +0.013 |
Neg logit contributions | |
oot | -0.014 |
MFT | -0.013 |
タ | -0.012 |
\": | -0.012 |
LM | -0.011 |
Token |
|
Token ID | 198 |
Feature activation | +1.79e-02 |
Pos logit contributions | |
oot | +0.093 |
MFT | +0.089 |
タ | +0.084 |
\": | +0.083 |
LM | +0.080 |
Neg logit contributions | |
weet | -0.089 |
orate | -0.067 |
sex | -0.066 |
antly | -0.065 |
iculture | -0.065 |
Token |
|
Token ID | 198 |
Feature activation | +1.22e-02 |
Pos logit contributions | |
oot | +0.013 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | John |
Token ID | 1757 |
Feature activation | -1.02e-01 |
Pos logit contributions | |
weet | +0.032 |
orate | +0.024 |
sex | +0.023 |
antly | +0.023 |
iculture | +0.023 |
Neg logit contributions | |
oot | -0.036 |
MFT | -0.035 |
タ | -0.033 |
\": | -0.032 |
heim | -0.031 |
Token | Goodman |
Token ID | 33318 |
Feature activation | -2.34e-01 |
Pos logit contributions | |
weet | +0.061 |
orate | +0.043 |
sex | +0.042 |
antly | +0.041 |
iculture | +0.041 |
Neg logit contributions | |
oot | -0.087 |
MFT | -0.083 |
タ | -0.079 |
\": | -0.079 |
LM | -0.076 |
Token |
|
Token ID | 198 |
Feature activation | -5.25e-02 |
Pos logit contributions | |
weet | +0.170 |
orate | +0.128 |
sex | +0.126 |
iculture | +0.125 |
antly | +0.124 |
Neg logit contributions | |
oot | -0.170 |
MFT | -0.162 |
タ | -0.152 |
\": | -0.151 |
LM | -0.146 |
Token |
|
Token ID | 198 |
Feature activation | -5.88e-02 |
Pos logit contributions | |
weet | +0.036 |
orate | +0.027 |
sex | +0.027 |
antly | +0.026 |
iculture | +0.026 |
Neg logit contributions | |
oot | -0.040 |
MFT | -0.038 |
タ | -0.036 |
\": | -0.035 |
LM | -0.034 |
Token | 1992 |
Token ID | 23847 |
Feature activation | -4.15e-02 |
Pos logit contributions | |
weet | +0.046 |
orate | +0.036 |
sex | +0.035 |
antly | +0.035 |
iculture | +0.035 |
Neg logit contributions | |
oot | -0.039 |
MFT | -0.037 |
タ | -0.035 |
\": | -0.034 |
LM | -0.033 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.82e-01 |
Pos logit contributions | |
weet | +0.029 |
orate | +0.021 |
sex | +0.021 |
antly | +0.021 |
iculture | +0.021 |
Neg logit contributions | |
oot | -0.032 |
MFT | -0.030 |
タ | -0.029 |
\": | -0.028 |
heim | -0.027 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
weet | +0.333 |
orate | +0.246 |
sex | +0.242 |
antly | +0.238 |
iculture | +0.238 |
Neg logit contributions | |
oot | -0.368 |
MFT | -0.352 |
タ | -0.333 |
\": | -0.330 |
LM | -0.318 |
Token | Gram |
Token ID | 20159 |
Feature activation | +2.09e-02 |
Pos logit contributions | |
weet | +0.105 |
orate | +0.078 |
sex | +0.077 |
antly | +0.076 |
iculture | +0.075 |
Neg logit contributions | |
oot | -0.111 |
MFT | -0.107 |
タ | -0.101 |
\": | -0.100 |
LM | -0.096 |
Token | mer |
Token ID | 647 |
Feature activation | +8.88e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.013 |
antly | -0.012 |
iculture | -0.012 |
Token |
|
Token ID | 198 |
Feature activation | -4.21e-02 |
Pos logit contributions | |
oot | +0.061 |
MFT | +0.058 |
タ | +0.055 |
\": | +0.054 |
LM | +0.052 |
Neg logit contributions | |
weet | -0.068 |
orate | -0.052 |
sex | -0.051 |
iculture | -0.051 |
antly | -0.051 |
Token |
|
Token ID | 198 |
Feature activation | -4.83e-02 |
Pos logit contributions | |
weet | +0.029 |
orate | +0.021 |
sex | +0.021 |
antly | +0.021 |
iculture | +0.021 |
Neg logit contributions | |
oot | -0.032 |
MFT | -0.031 |
タ | -0.029 |
\": | -0.028 |
LM | -0.028 |
Token | Lith |
Token ID | 19730 |
Feature activation | -9.83e-02 |
Pos logit contributions | |
weet | +0.023 |
orate | +0.017 |
sex | +0.017 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.028 |
MFT | -0.027 |
タ | -0.025 |
\": | -0.025 |
LM | -0.024 |
Token | gow |
Token ID | 21175 |
Feature activation | +7.86e-02 |
Pos logit contributions | |
weet | +0.076 |
orate | +0.059 |
sex | +0.058 |
antly | +0.057 |
iculture | +0.057 |
Neg logit contributions | |
oot | -0.067 |
MFT | -0.063 |
タ | -0.059 |
\": | -0.059 |
LM | -0.056 |
Token |
|
Token ID | 198 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
oot | +0.057 |
MFT | +0.054 |
タ | +0.051 |
\": | +0.051 |
LM | +0.049 |
Neg logit contributions | |
weet | -0.057 |
orate | -0.043 |
sex | -0.042 |
iculture | -0.042 |
antly | -0.041 |
Token |
|
Token ID | 198 |
Feature activation | +2.16e-03 |
Pos logit contributions | |
oot | +0.009 |
MFT | +0.008 |
タ | +0.008 |
\": | +0.008 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.006 |
sex | -0.006 |
antly | -0.006 |
iculture | -0.006 |
Token | 1996 |
Token ID | 22288 |
Feature activation | -1.30e-02 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.80e-01 |
Pos logit contributions | |
weet | +0.009 |
orate | +0.006 |
sex | +0.006 |
antly | +0.006 |
iculture | +0.006 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.010 |
タ | -0.009 |
\": | -0.009 |
LM | -0.009 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.04e-01 |
Pos logit contributions | |
weet | +0.311 |
orate | +0.223 |
sex | +0.223 |
antly | +0.216 |
iculture | +0.216 |
Neg logit contributions | |
oot | -0.386 |
MFT | -0.372 |
タ | -0.354 |
\": | -0.349 |
LM | -0.337 |
Token | Gram |
Token ID | 20159 |
Feature activation | -9.67e-04 |
Pos logit contributions | |
weet | +0.073 |
orate | +0.054 |
sex | +0.054 |
antly | +0.053 |
iculture | +0.053 |
Neg logit contributions | |
oot | -0.078 |
MFT | -0.075 |
タ | -0.071 |
\": | -0.070 |
LM | -0.067 |
Token | mer |
Token ID | 647 |
Feature activation | +1.35e-01 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +3.39e-03 |
Pos logit contributions | |
oot | +0.098 |
MFT | +0.093 |
タ | +0.088 |
\": | +0.087 |
LM | +0.084 |
Neg logit contributions | |
weet | -0.098 |
orate | -0.074 |
sex | -0.072 |
iculture | -0.072 |
antly | -0.071 |
Token |
|
Token ID | 198 |
Feature activation | -3.92e-03 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.002 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | 1994 |
Token ID | 22666 |
Feature activation | -1.33e-02 |
Pos logit contributions | |
weet | +0.002 |
orate | +0.001 |
sex | +0.001 |
iculture | +0.001 |
antly | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token | Jerry |
Token ID | 13075 |
Feature activation | +1.36e-01 |
Pos logit contributions | |
weet | +0.009 |
orate | +0.007 |
sex | +0.007 |
antly | +0.007 |
iculture | +0.007 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.010 |
タ | -0.009 |
\": | -0.009 |
LM | -0.009 |
Token | Se |
Token ID | 1001 |
Feature activation | +5.95e-02 |
Pos logit contributions | |
oot | +0.087 |
MFT | +0.082 |
タ | +0.077 |
\": | +0.076 |
LM | +0.073 |
Neg logit contributions | |
weet | -0.110 |
orate | -0.086 |
sex | -0.084 |
antly | -0.083 |
iculture | -0.083 |
Token | infeld |
Token ID | 47187 |
Feature activation | +1.92e-01 |
Pos logit contributions | |
oot | +0.028 |
MFT | +0.025 |
タ | +0.023 |
\": | +0.023 |
LM | +0.021 |
Neg logit contributions | |
weet | -0.059 |
orate | -0.048 |
sex | -0.048 |
antly | -0.047 |
iculture | -0.047 |
Token | 1995 |
Token ID | 8735 |
Feature activation | -1.96e-02 |
Pos logit contributions | |
oot | +0.136 |
MFT | +0.129 |
タ | +0.121 |
\": | +0.120 |
LM | +0.116 |
Neg logit contributions | |
weet | -0.143 |
orate | -0.109 |
sex | -0.107 |
iculture | -0.106 |
antly | -0.105 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.71e-01 |
Pos logit contributions | |
weet | +0.013 |
orate | +0.009 |
sex | +0.009 |
antly | +0.009 |
iculture | +0.009 |
Neg logit contributions | |
oot | -0.016 |
MFT | -0.015 |
タ | -0.014 |
\": | -0.014 |
LM | -0.014 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.07e-01 |
Pos logit contributions | |
weet | +0.306 |
orate | +0.221 |
sex | +0.219 |
antly | +0.214 |
iculture | +0.213 |
Neg logit contributions | |
oot | -0.378 |
MFT | -0.363 |
タ | -0.345 |
\": | -0.341 |
LM | -0.329 |
Token | Gram |
Token ID | 20159 |
Feature activation | +1.11e-02 |
Pos logit contributions | |
weet | +0.075 |
orate | +0.056 |
sex | +0.055 |
antly | +0.054 |
iculture | +0.054 |
Neg logit contributions | |
oot | -0.081 |
MFT | -0.077 |
タ | -0.073 |
\": | -0.072 |
LM | -0.070 |
Token | mer |
Token ID | 647 |
Feature activation | +1.30e-01 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.006 |
\": | +0.006 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.007 |
sex | -0.007 |
antly | -0.007 |
iculture | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | -7.90e-03 |
Pos logit contributions | |
oot | +0.096 |
MFT | +0.091 |
タ | +0.086 |
\": | +0.085 |
LM | +0.082 |
Neg logit contributions | |
weet | -0.094 |
orate | -0.071 |
sex | -0.069 |
iculture | -0.069 |
antly | -0.068 |
Token |
|
Token ID | 198 |
Feature activation | -1.68e-02 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | Garry |
Token ID | 45354 |
Feature activation | +1.19e-01 |
Pos logit contributions | |
weet | +0.014 |
orate | +0.010 |
sex | +0.010 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.015 |
MFT | -0.015 |
タ | -0.014 |
\": | -0.014 |
heim | -0.013 |
Token | Sh |
Token ID | 911 |
Feature activation | +4.39e-02 |
Pos logit contributions | |
oot | +0.090 |
MFT | +0.086 |
タ | +0.081 |
\": | +0.080 |
�� | +0.077 |
Neg logit contributions | |
weet | -0.083 |
sex | -0.061 |
orate | -0.061 |
Rak | -0.060 |
antly | -0.060 |
Token | and |
Token ID | 392 |
Feature activation | -1.05e-01 |
Pos logit contributions | |
oot | +0.034 |
MFT | +0.032 |
タ | +0.030 |
\": | +0.030 |
heim | +0.029 |
Neg logit contributions | |
weet | -0.030 |
orate | -0.022 |
sex | -0.022 |
redistributed | -0.022 |
iculture | -0.022 |
Token | ling |
Token ID | 1359 |
Feature activation | -1.26e-01 |
Pos logit contributions | |
weet | +0.073 |
orate | +0.054 |
sex | +0.053 |
antly | +0.053 |
redistributed | +0.053 |
Neg logit contributions | |
oot | -0.080 |
MFT | -0.076 |
タ | -0.072 |
\": | -0.071 |
heim | -0.069 |
Token | 1994 |
Token ID | 9162 |
Feature activation | +1.44e-02 |
Pos logit contributions | |
weet | +0.095 |
orate | +0.072 |
sex | +0.071 |
antly | +0.070 |
iculture | +0.070 |
Neg logit contributions | |
oot | -0.089 |
MFT | -0.084 |
タ | -0.079 |
\": | -0.078 |
LM | -0.075 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.68e-01 |
Pos logit contributions | |
oot | +0.011 |
MFT | +0.011 |
タ | +0.010 |
\": | +0.010 |
heim | +0.010 |
Neg logit contributions | |
weet | -0.010 |
orate | -0.007 |
sex | -0.007 |
antly | -0.007 |
iculture | -0.007 |
Token | sey |
Token ID | 4397 |
Feature activation | -1.01e-01 |
Pos logit contributions | |
weet | +0.306 |
orate | +0.222 |
sex | +0.219 |
antly | +0.214 |
iculture | +0.214 |
Neg logit contributions | |
oot | -0.374 |
MFT | -0.359 |
タ | -0.341 |
\": | -0.338 |
LM | -0.326 |
Token | Gram |
Token ID | 20159 |
Feature activation | +1.41e-02 |
Pos logit contributions | |
weet | +0.071 |
orate | +0.052 |
sex | +0.052 |
antly | +0.051 |
iculture | +0.051 |
Neg logit contributions | |
oot | -0.077 |
MFT | -0.073 |
タ | -0.069 |
\": | -0.068 |
LM | -0.066 |
Token | mer |
Token ID | 647 |
Feature activation | +1.11e-01 |
Pos logit contributions | |
oot | +0.009 |
MFT | +0.009 |
タ | +0.008 |
\": | +0.008 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.009 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | -6.93e-03 |
Pos logit contributions | |
oot | +0.081 |
MFT | +0.077 |
タ | +0.073 |
\": | +0.072 |
LM | +0.070 |
Neg logit contributions | |
weet | -0.080 |
orate | -0.060 |
sex | -0.059 |
iculture | -0.058 |
antly | -0.058 |
Token |
|
Token ID | 198 |
Feature activation | -1.26e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.003 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.005 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | -8.50e-03 |
Pos logit contributions | |
weet | +0.019 |
orate | +0.014 |
sex | +0.014 |
iculture | +0.014 |
redistributed | +0.014 |
Neg logit contributions | |
oot | -0.018 |
MFT | -0.017 |
タ | -0.016 |
\": | -0.016 |
LM | -0.015 |
Token | Fox |
Token ID | 5426 |
Feature activation | -5.40e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.003 |
iculture | +0.003 |
sex | +0.003 |
antly | +0.003 |
Neg logit contributions | |
oot | -0.008 |
MFT | -0.008 |
タ | -0.007 |
\": | -0.007 |
LM | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +4.33e-02 |
Pos logit contributions | |
weet | +0.039 |
orate | +0.030 |
sex | +0.029 |
iculture | +0.029 |
antly | +0.029 |
Neg logit contributions | |
oot | -0.039 |
MFT | -0.037 |
タ | -0.035 |
\": | -0.035 |
LM | -0.034 |
Token |
|
Token ID | 198 |
Feature activation | +3.05e-02 |
Pos logit contributions | |
oot | +0.033 |
MFT | +0.031 |
タ | +0.030 |
\": | +0.029 |
LM | +0.028 |
Neg logit contributions | |
weet | -0.030 |
orate | -0.022 |
sex | -0.022 |
antly | -0.022 |
iculture | -0.021 |
Token | 1997 |
Token ID | 21498 |
Feature activation | -1.92e-02 |
Pos logit contributions | |
oot | +0.019 |
MFT | +0.018 |
タ | +0.017 |
\": | +0.017 |
LM | +0.016 |
Neg logit contributions | |
weet | -0.025 |
orate | -0.020 |
sex | -0.019 |
antly | -0.019 |
iculture | -0.019 |
Token | Kel |
Token ID | 15150 |
Feature activation | -4.68e-01 |
Pos logit contributions | |
weet | +0.013 |
orate | +0.010 |
sex | +0.009 |
antly | +0.009 |
iculture | +0.009 |
Neg logit contributions | |
oot | -0.015 |
MFT | -0.014 |
タ | -0.013 |
\": | -0.013 |
LM | -0.013 |
Token | sey |
Token ID | 4397 |
Feature activation | -8.77e-02 |
Pos logit contributions | |
weet | +0.302 |
orate | +0.217 |
sex | +0.217 |
antly | +0.210 |
iculture | +0.210 |
Neg logit contributions | |
oot | -0.378 |
MFT | -0.364 |
タ | -0.346 |
\": | -0.342 |
LM | -0.330 |
Token | Gram |
Token ID | 20159 |
Feature activation | +6.92e-03 |
Pos logit contributions | |
weet | +0.061 |
orate | +0.046 |
sex | +0.045 |
antly | +0.044 |
iculture | +0.044 |
Neg logit contributions | |
oot | -0.066 |
MFT | -0.063 |
タ | -0.060 |
\": | -0.059 |
LM | -0.057 |
Token | mer |
Token ID | 647 |
Feature activation | +1.40e-01 |
Pos logit contributions | |
oot | +0.004 |
MFT | +0.004 |
タ | +0.004 |
\": | +0.004 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.006 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +1.99e-02 |
Pos logit contributions | |
oot | +0.102 |
MFT | +0.097 |
タ | +0.092 |
\": | +0.091 |
LM | +0.088 |
Neg logit contributions | |
weet | -0.101 |
orate | -0.076 |
sex | -0.074 |
iculture | -0.074 |
antly | -0.074 |
Token |
|
Token ID | 198 |
Feature activation | +3.40e-03 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.014 |
\": | +0.013 |
LM | +0.013 |
Neg logit contributions | |
weet | -0.014 |
orate | -0.010 |
sex | -0.010 |
antly | -0.010 |
iculture | -0.010 |
Token | B |
Token ID | 33 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
oot | +0.044 |
MFT | +0.042 |
タ | +0.039 |
\": | +0.039 |
LM | +0.037 |
Neg logit contributions | |
weet | -0.044 |
orate | -0.033 |
sex | -0.033 |
iculture | -0.032 |
antly | -0.032 |
Token | old |
Token ID | 727 |
Feature activation | +4.55e-02 |
Pos logit contributions | |
weet | +0.089 |
orate | +0.069 |
sex | +0.068 |
iculture | +0.067 |
antly | +0.067 |
Neg logit contributions | |
oot | -0.075 |
MFT | -0.071 |
タ | -0.067 |
\": | -0.066 |
LM | -0.063 |
Token | ) |
Token ID | 8 |
Feature activation | -5.16e-02 |
Pos logit contributions | |
oot | +0.031 |
MFT | +0.030 |
タ | +0.028 |
\": | +0.027 |
LM | +0.026 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.027 |
sex | -0.026 |
antly | -0.026 |
iculture | -0.026 |
Token | $ |
Token ID | 720 |
Feature activation | +6.11e-02 |
Pos logit contributions | |
weet | +0.040 |
orate | +0.031 |
sex | +0.030 |
antly | +0.030 |
iculture | +0.030 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.033 |
タ | -0.031 |
\": | -0.031 |
LM | -0.030 |
Token | Br |
Token ID | 9414 |
Feature activation | +3.68e-02 |
Pos logit contributions | |
oot | +0.042 |
MFT | +0.040 |
タ | +0.038 |
\": | +0.037 |
LM | +0.036 |
Neg logit contributions | |
weet | -0.047 |
orate | -0.036 |
sex | -0.035 |
iculture | -0.035 |
antly | -0.035 |
Token | ush |
Token ID | 1530 |
Feature activation | +2.62e-01 |
Pos logit contributions | |
oot | +0.022 |
MFT | +0.021 |
タ | +0.020 |
\": | +0.019 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.024 |
sex | -0.024 |
antly | -0.024 |
iculture | -0.024 |
Token | 1 |
Token ID | 16 |
Feature activation | -2.78e-02 |
Pos logit contributions | |
oot | +0.187 |
MFT | +0.178 |
タ | +0.168 |
\": | +0.166 |
LM | +0.160 |
Neg logit contributions | |
weet | -0.194 |
orate | -0.147 |
sex | -0.145 |
antly | -0.143 |
iculture | -0.143 |
Token | = |
Token ID | 796 |
Feature activation | -3.44e-02 |
Pos logit contributions | |
weet | +0.020 |
orate | +0.015 |
sex | +0.015 |
antly | +0.015 |
iculture | +0.015 |
Neg logit contributions | |
oot | -0.020 |
MFT | -0.019 |
タ | -0.018 |
\": | -0.018 |
LM | -0.017 |
Token | New |
Token ID | 968 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
weet | +0.025 |
orate | +0.019 |
sex | +0.019 |
antly | +0.019 |
redistributed | +0.019 |
Neg logit contributions | |
oot | -0.025 |
MFT | -0.023 |
タ | -0.022 |
\": | -0.022 |
LM | -0.021 |
Token | - |
Token ID | 12 |
Feature activation | -3.85e-03 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.013 |
\": | +0.012 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.011 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | Object |
Token ID | 10267 |
Feature activation | +1.11e-01 |
Pos logit contributions | |
weet | +0.004 |
sex | +0.003 |
orate | +0.003 |
iculture | +0.003 |
antly | +0.003 |
Neg logit contributions | |
oot | -0.002 |
MFT | -0.002 |
タ | -0.001 |
\": | -0.001 |
LM | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | -1.01e-03 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.011 |
\": | +0.011 |
heim | +0.010 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.008 |
sex | -0.008 |
redistributed | -0.008 |
antly | -0.008 |
Token | public |
Token ID | 1171 |
Feature activation | -1.18e-01 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
redistributed | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
heim | -0.001 |
Token | radio |
Token ID | 5243 |
Feature activation | +1.73e-02 |
Pos logit contributions | |
weet | +0.081 |
orate | +0.060 |
sex | +0.059 |
antly | +0.058 |
iculture | +0.058 |
Neg logit contributions | |
oot | -0.090 |
MFT | -0.086 |
タ | -0.082 |
\": | -0.081 |
LM | -0.078 |
Token | station |
Token ID | 4429 |
Feature activation | -1.87e-03 |
Pos logit contributions | |
oot | +0.010 |
MFT | +0.009 |
タ | +0.008 |
\": | +0.008 |
heim | +0.008 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.012 |
sex | -0.012 |
antly | -0.012 |
redistributed | -0.012 |
Token | KP |
Token ID | 45814 |
Feature activation | +1.57e-01 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
iculture | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
heim | -0.001 |
Token | CC |
Token ID | 4093 |
Feature activation | +2.56e-01 |
Pos logit contributions | |
oot | +0.060 |
MFT | +0.055 |
タ | +0.049 |
\": | +0.048 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.168 |
orate | -0.140 |
sex | -0.138 |
antly | -0.137 |
iculture | -0.137 |
Token | on |
Token ID | 319 |
Feature activation | +2.35e-02 |
Pos logit contributions | |
oot | +0.178 |
MFT | +0.169 |
タ | +0.160 |
\": | +0.159 |
heim | +0.150 |
Neg logit contributions | |
weet | -0.193 |
orate | -0.147 |
sex | -0.145 |
iculture | -0.143 |
redistributed | -0.143 |
Token | September |
Token ID | 2693 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
oot | +0.015 |
MFT | +0.014 |
タ | +0.014 |
\": | +0.013 |
heim | +0.013 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.015 |
sex | -0.014 |
antly | -0.014 |
iculture | -0.014 |
Token | 29 |
Token ID | 2808 |
Feature activation | +6.01e-02 |
Pos logit contributions | |
oot | +0.078 |
MFT | +0.074 |
タ | +0.070 |
\": | +0.069 |
LM | +0.066 |
Neg logit contributions | |
weet | -0.081 |
orate | -0.062 |
sex | -0.061 |
antly | -0.060 |
iculture | -0.060 |
Token | , |
Token ID | 11 |
Feature activation | +2.33e-02 |
Pos logit contributions | |
oot | +0.045 |
MFT | +0.043 |
タ | +0.040 |
\": | +0.040 |
LM | +0.038 |
Neg logit contributions | |
weet | -0.043 |
orate | -0.032 |
sex | -0.031 |
antly | -0.031 |
iculture | -0.031 |
Token | 2010 |
Token ID | 3050 |
Feature activation | -1.50e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.015 |
Neg logit contributions | |
weet | -0.017 |
orate | -0.013 |
sex | -0.012 |
antly | -0.012 |
iculture | -0.012 |
Token | maj |
Token ID | 16486 |
Feature activation | -3.22e-01 |
Pos logit contributions | |
oot | +0.038 |
MFT | +0.036 |
タ | +0.034 |
\": | +0.034 |
LM | +0.033 |
Neg logit contributions | |
weet | -0.036 |
orate | -0.027 |
sex | -0.026 |
antly | -0.026 |
iculture | -0.026 |
Token | est |
Token ID | 395 |
Feature activation | -7.89e-02 |
Pos logit contributions | |
weet | +0.237 |
orate | +0.179 |
sex | +0.176 |
antly | +0.173 |
iculture | +0.173 |
Neg logit contributions | |
oot | -0.232 |
MFT | -0.221 |
タ | -0.208 |
\": | -0.206 |
LM | -0.198 |
Token | ical |
Token ID | 605 |
Feature activation | -7.29e-02 |
Pos logit contributions | |
weet | +0.065 |
orate | +0.051 |
sex | +0.050 |
iculture | +0.050 |
antly | +0.050 |
Neg logit contributions | |
oot | -0.050 |
MFT | -0.047 |
タ | -0.043 |
\": | -0.043 |
LM | -0.041 |
Token | roof |
Token ID | 9753 |
Feature activation | -1.25e-01 |
Pos logit contributions | |
weet | +0.053 |
orate | +0.040 |
sex | +0.039 |
antly | +0.039 |
iculture | +0.039 |
Neg logit contributions | |
oot | -0.053 |
MFT | -0.050 |
タ | -0.047 |
\": | -0.047 |
LM | -0.045 |
Token | fre |
Token ID | 2030 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
weet | +0.090 |
orate | +0.067 |
sex | +0.066 |
antly | +0.065 |
iculture | +0.065 |
Neg logit contributions | |
oot | -0.091 |
MFT | -0.087 |
タ | -0.082 |
\": | -0.081 |
LM | -0.078 |
Token | tted |
Token ID | 28734 |
Feature activation | +3.23e-01 |
Pos logit contributions | |
oot | +0.061 |
MFT | +0.060 |
タ | +0.055 |
\": | +0.054 |
LM | +0.051 |
Neg logit contributions | |
weet | -0.091 |
orate | -0.071 |
sex | -0.071 |
antly | -0.070 |
iculture | -0.070 |
Token | with |
Token ID | 351 |
Feature activation | +3.07e-02 |
Pos logit contributions | |
oot | +0.237 |
MFT | +0.227 |
タ | +0.214 |
\": | +0.212 |
LM | +0.204 |
Neg logit contributions | |
weet | -0.232 |
orate | -0.174 |
sex | -0.171 |
antly | -0.169 |
iculture | -0.168 |
Token | golden |
Token ID | 10861 |
Feature activation | +2.23e-02 |
Pos logit contributions | |
oot | +0.023 |
MFT | +0.022 |
タ | +0.020 |
\": | +0.020 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | fire |
Token ID | 2046 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.012 |
antly | -0.011 |
iculture | -0.011 |
Token | , |
Token ID | 11 |
Feature activation | +3.40e-02 |
Pos logit contributions | |
weet | +0.106 |
orate | +0.080 |
sex | +0.078 |
antly | +0.077 |
iculture | +0.077 |
Neg logit contributions | |
oot | -0.105 |
MFT | -0.100 |
タ | -0.095 |
\": | -0.094 |
LM | -0.090 |
Token |
|
Token ID | 198 |
Feature activation | +4.41e-02 |
Pos logit contributions | |
oot | +0.025 |
MFT | +0.024 |
タ | +0.023 |
\": | +0.022 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.024 |
orate | -0.018 |
sex | -0.018 |
antly | -0.018 |
iculture | -0.018 |
Token | buff |
Token ID | 36873 |
Feature activation | +1.61e-01 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.012 |
sex | -0.012 |
antly | -0.012 |
iculture | -0.012 |
Token | ers |
Token ID | 364 |
Feature activation | -8.09e-02 |
Pos logit contributions | |
oot | +0.136 |
MFT | +0.130 |
タ | +0.124 |
\": | +0.123 |
LM | +0.119 |
Neg logit contributions | |
weet | -0.098 |
orate | -0.069 |
sex | -0.068 |
iculture | -0.067 |
antly | -0.067 |
Token | and |
Token ID | 290 |
Feature activation | +5.79e-03 |
Pos logit contributions | |
weet | +0.058 |
orate | +0.044 |
sex | +0.043 |
antly | +0.043 |
iculture | +0.042 |
Neg logit contributions | |
oot | -0.059 |
MFT | -0.057 |
タ | -0.053 |
\": | -0.053 |
LM | -0.051 |
Token | prot |
Token ID | 1237 |
Feature activation | -1.27e-01 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
LM | +0.003 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token | ob |
Token ID | 672 |
Feature activation | -4.55e-02 |
Pos logit contributions | |
weet | +0.097 |
orate | +0.074 |
sex | +0.073 |
antly | +0.072 |
iculture | +0.072 |
Neg logit contributions | |
oot | -0.088 |
MFT | -0.083 |
タ | -0.078 |
\": | -0.077 |
LM | -0.074 |
Token | uf |
Token ID | 3046 |
Feature activation | +3.44e-01 |
Pos logit contributions | |
weet | +0.052 |
orate | +0.044 |
sex | +0.044 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.014 |
MFT | -0.013 |
タ | -0.011 |
\": | -0.010 |
LM | -0.009 |
Token | should |
Token ID | 815 |
Feature activation | +3.48e-03 |
Pos logit contributions | |
oot | +0.237 |
MFT | +0.226 |
タ | +0.212 |
\": | +0.210 |
LM | +0.201 |
Neg logit contributions | |
weet | -0.263 |
orate | -0.200 |
sex | -0.198 |
antly | -0.195 |
iculture | -0.195 |
Token | have |
Token ID | 423 |
Feature activation | +5.28e-02 |
Pos logit contributions | |
oot | +0.002 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | an |
Token ID | 281 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
oot | +0.040 |
MFT | +0.038 |
タ | +0.036 |
\": | +0.036 |
LM | +0.034 |
Neg logit contributions | |
weet | -0.037 |
orate | -0.027 |
sex | -0.027 |
iculture | -0.027 |
antly | -0.027 |
Token | easy |
Token ID | 2562 |
Feature activation | -6.22e-02 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
heim | +0.007 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.007 |
sex | -0.007 |
antly | -0.007 |
redistributed | -0.007 |
Token | time |
Token ID | 640 |
Feature activation | +6.06e-02 |
Pos logit contributions | |
weet | +0.045 |
orate | +0.034 |
sex | +0.034 |
antly | +0.033 |
iculture | +0.033 |
Neg logit contributions | |
oot | -0.045 |
MFT | -0.043 |
タ | -0.040 |
\": | -0.040 |
LM | -0.039 |
Token | but |
Token ID | 475 |
Feature activation | +3.82e-02 |
Pos logit contributions | |
weet | +0.064 |
orate | +0.048 |
sex | +0.048 |
antly | +0.047 |
redistributed | +0.047 |
Neg logit contributions | |
oot | -0.062 |
MFT | -0.059 |
タ | -0.055 |
\": | -0.055 |
LM | -0.053 |
Token | I |
Token ID | 314 |
Feature activation | +7.92e-02 |
Pos logit contributions | |
oot | +0.027 |
MFT | +0.026 |
タ | +0.024 |
\": | +0.024 |
LM | +0.023 |
Neg logit contributions | |
weet | -0.028 |
orate | -0.021 |
sex | -0.021 |
antly | -0.021 |
iculture | -0.021 |
Token | did |
Token ID | 750 |
Feature activation | -7.55e-02 |
Pos logit contributions | |
oot | +0.057 |
MFT | +0.054 |
タ | +0.051 |
\": | +0.050 |
LM | +0.048 |
Neg logit contributions | |
weet | -0.058 |
orate | -0.044 |
sex | -0.043 |
antly | -0.043 |
iculture | -0.043 |
Token | manage |
Token ID | 6687 |
Feature activation | -5.33e-02 |
Pos logit contributions | |
weet | +0.056 |
orate | +0.043 |
sex | +0.042 |
antly | +0.042 |
iculture | +0.041 |
Neg logit contributions | |
oot | -0.053 |
MFT | -0.051 |
タ | -0.048 |
\": | -0.047 |
heim | -0.045 |
Token | to |
Token ID | 284 |
Feature activation | -8.91e-03 |
Pos logit contributions | |
weet | +0.035 |
orate | +0.025 |
sex | +0.025 |
antly | +0.024 |
iculture | +0.024 |
Neg logit contributions | |
oot | -0.043 |
MFT | -0.041 |
タ | -0.039 |
\": | -0.039 |
LM | -0.037 |
Token | answer |
Token ID | 3280 |
Feature activation | +3.31e-01 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.005 |
\": | -0.005 |
heim | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | +5.34e-02 |
Pos logit contributions | |
oot | +0.250 |
MFT | +0.238 |
タ | +0.225 |
\": | +0.223 |
LM | +0.216 |
Neg logit contributions | |
weet | -0.231 |
orate | -0.171 |
sex | -0.169 |
iculture | -0.167 |
antly | -0.166 |
Token | few |
Token ID | 1178 |
Feature activation | +2.21e-02 |
Pos logit contributions | |
oot | +0.037 |
MFT | +0.035 |
タ | +0.033 |
\": | +0.033 |
LM | +0.031 |
Neg logit contributions | |
weet | -0.041 |
orate | -0.031 |
sex | -0.030 |
iculture | -0.030 |
antly | -0.030 |
Token | questions |
Token ID | 2683 |
Feature activation | -1.33e-02 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.015 |
\": | +0.015 |
LM | +0.014 |
Neg logit contributions | |
weet | -0.016 |
orate | -0.012 |
sex | -0.011 |
antly | -0.011 |
iculture | -0.011 |
Token | in |
Token ID | 287 |
Feature activation | +3.61e-02 |
Pos logit contributions | |
weet | +0.010 |
orate | +0.007 |
sex | +0.007 |
antly | +0.007 |
redistributed | +0.007 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.009 |
タ | -0.009 |
\": | -0.009 |
LM | -0.008 |
Token | the |
Token ID | 262 |
Feature activation | +7.38e-02 |
Pos logit contributions | |
oot | +0.025 |
MFT | +0.024 |
タ | +0.023 |
\": | +0.023 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.027 |
orate | -0.021 |
sex | -0.020 |
antly | -0.020 |
iculture | -0.020 |
Token | seeds |
Token ID | 11904 |
Feature activation | +2.91e-02 |
Pos logit contributions | |
weet | +0.080 |
orate | +0.059 |
sex | +0.058 |
antly | +0.057 |
iculture | +0.057 |
Neg logit contributions | |
oot | -0.093 |
MFT | -0.089 |
タ | -0.084 |
\": | -0.084 |
LM | -0.081 |
Token | but |
Token ID | 475 |
Feature activation | +5.03e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.015 |
Token | these |
Token ID | 777 |
Feature activation | +2.96e-02 |
Pos logit contributions | |
oot | +0.035 |
MFT | +0.034 |
タ | +0.032 |
\": | +0.032 |
heim | +0.030 |
Neg logit contributions | |
weet | -0.038 |
orate | -0.029 |
sex | -0.028 |
antly | -0.028 |
redistributed | -0.028 |
Token | are |
Token ID | 389 |
Feature activation | +1.13e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.018 |
LM | +0.018 |
Neg logit contributions | |
weet | -0.022 |
orate | -0.017 |
sex | -0.017 |
antly | -0.016 |
iculture | -0.016 |
Token | a |
Token ID | 257 |
Feature activation | +1.79e-02 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.008 |
タ | +0.007 |
\": | +0.007 |
LM | +0.007 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.006 |
sex | -0.006 |
antly | -0.006 |
redistributed | -0.006 |
Token | pseud |
Token ID | 25038 |
Feature activation | +5.00e-02 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.012 |
タ | +0.011 |
\": | +0.011 |
LM | +0.011 |
Neg logit contributions | |
weet | -0.014 |
orate | -0.010 |
sex | -0.010 |
antly | -0.010 |
redistributed | -0.010 |
Token | og |
Token ID | 519 |
Feature activation | +9.56e-02 |
Pos logit contributions | |
oot | +0.033 |
MFT | +0.031 |
タ | +0.029 |
\": | +0.029 |
LM | +0.028 |
Neg logit contributions | |
weet | -0.040 |
orate | -0.031 |
sex | -0.031 |
antly | -0.030 |
iculture | -0.030 |
Token | rain |
Token ID | 3201 |
Feature activation | -8.85e-02 |
Pos logit contributions | |
oot | +0.086 |
MFT | +0.083 |
タ | +0.080 |
\": | +0.079 |
LM | +0.077 |
Neg logit contributions | |
weet | -0.053 |
orate | -0.035 |
sex | -0.034 |
antly | -0.034 |
iculture | -0.034 |
Token | , |
Token ID | 11 |
Feature activation | +1.29e-03 |
Pos logit contributions | |
weet | +0.066 |
orate | +0.049 |
sex | +0.048 |
antly | +0.048 |
iculture | +0.048 |
Neg logit contributions | |
oot | -0.063 |
MFT | -0.061 |
\": | -0.057 |
タ | -0.057 |
LM | -0.054 |
Token | and |
Token ID | 290 |
Feature activation | +3.05e-02 |
Pos logit contributions | |
oot | +0.001 |
MFT | +0.001 |
タ | +0.001 |
\": | +0.001 |
LM | +0.001 |
Neg logit contributions | |
weet | -0.001 |
orate | -0.001 |
sex | -0.001 |
antly | -0.001 |
iculture | -0.001 |
Token | Prof |
Token ID | 4415 |
Feature activation | +2.12e-02 |
Pos logit contributions | |
oot | +0.021 |
MFT | +0.020 |
タ | +0.019 |
\": | +0.019 |
heim | +0.018 |
Neg logit contributions | |
weet | -0.023 |
orate | -0.017 |
sex | -0.017 |
antly | -0.017 |
redistributed | -0.017 |
Token | front |
Token ID | 2166 |
Feature activation | +1.15e-01 |
Pos logit contributions | |
weet | +0.072 |
orate | +0.057 |
sex | +0.056 |
iculture | +0.055 |
antly | +0.055 |
Neg logit contributions | |
oot | -0.057 |
MFT | -0.055 |
タ | -0.051 |
\": | -0.051 |
LM | -0.048 |
Token | and |
Token ID | 290 |
Feature activation | -3.38e-02 |
Pos logit contributions | |
oot | +0.084 |
MFT | +0.081 |
タ | +0.076 |
\": | +0.075 |
LM | +0.072 |
Neg logit contributions | |
weet | -0.083 |
orate | -0.062 |
sex | -0.061 |
antly | -0.061 |
iculture | -0.061 |
Token | then |
Token ID | 788 |
Feature activation | -2.97e-02 |
Pos logit contributions | |
weet | +0.025 |
orate | +0.019 |
sex | +0.019 |
antly | +0.018 |
iculture | +0.018 |
Neg logit contributions | |
oot | -0.024 |
MFT | -0.023 |
タ | -0.022 |
\": | -0.022 |
LM | -0.021 |
Token | pay |
Token ID | 1414 |
Feature activation | +4.44e-02 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.017 |
sex | +0.017 |
antly | +0.016 |
iculture | +0.016 |
Neg logit contributions | |
oot | -0.021 |
MFT | -0.020 |
タ | -0.019 |
\": | -0.019 |
LM | -0.018 |
Token | the |
Token ID | 262 |
Feature activation | +1.18e-02 |
Pos logit contributions | |
oot | +0.032 |
MFT | +0.031 |
タ | +0.029 |
\": | +0.029 |
LM | +0.028 |
Neg logit contributions | |
weet | -0.032 |
orate | -0.024 |
sex | -0.024 |
iculture | -0.024 |
antly | -0.024 |
Token | rest |
Token ID | 1334 |
Feature activation | +1.64e-01 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.007 |
タ | +0.007 |
\": | +0.007 |
heim | +0.007 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.007 |
sex | -0.007 |
antly | -0.007 |
redistributed | -0.007 |
Token | at |
Token ID | 379 |
Feature activation | -3.01e-02 |
Pos logit contributions | |
oot | +0.121 |
MFT | +0.115 |
タ | +0.108 |
\": | +0.107 |
LM | +0.103 |
Neg logit contributions | |
weet | -0.117 |
orate | -0.088 |
sex | -0.087 |
iculture | -0.085 |
antly | -0.085 |
Token | 1 |
Token ID | 352 |
Feature activation | -7.40e-02 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.016 |
sex | +0.016 |
antly | +0.016 |
redistributed | +0.016 |
Neg logit contributions | |
oot | -0.022 |
MFT | -0.021 |
タ | -0.020 |
\": | -0.020 |
LM | -0.019 |
Token | percent |
Token ID | 1411 |
Feature activation | +7.51e-02 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.046 |
sex | +0.045 |
antly | +0.045 |
iculture | +0.044 |
Neg logit contributions | |
oot | -0.048 |
MFT | -0.046 |
タ | -0.043 |
\": | -0.042 |
LM | -0.041 |
Token | interest |
Token ID | 1393 |
Feature activation | +5.40e-02 |
Pos logit contributions | |
oot | +0.054 |
MFT | +0.052 |
タ | +0.048 |
\": | +0.048 |
LM | +0.046 |
Neg logit contributions | |
weet | -0.055 |
orate | -0.042 |
sex | -0.041 |
iculture | -0.040 |
antly | -0.040 |
Token | over |
Token ID | 625 |
Feature activation | -2.54e-02 |
Pos logit contributions | |
oot | +0.039 |
MFT | +0.037 |
タ | +0.035 |
\": | +0.035 |
LM | +0.033 |
Neg logit contributions | |
weet | -0.040 |
orate | -0.030 |
sex | -0.029 |
antly | -0.029 |
iculture | -0.029 |
Token | , |
Token ID | 11 |
Feature activation | +5.42e-03 |
Pos logit contributions | |
weet | +0.098 |
orate | +0.073 |
sex | +0.072 |
antly | +0.071 |
iculture | +0.071 |
Neg logit contributions | |
oot | -0.101 |
MFT | -0.096 |
タ | -0.091 |
\": | -0.090 |
LM | -0.086 |
Token | Boyle |
Token ID | 37619 |
Feature activation | +2.06e-01 |
Pos logit contributions | |
oot | +0.004 |
MFT | +0.004 |
タ | +0.004 |
\": | +0.003 |
LM | +0.003 |
Neg logit contributions | |
weet | -0.004 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
iculture | -0.003 |
Token | recently |
Token ID | 2904 |
Feature activation | +1.84e-01 |
Pos logit contributions | |
oot | +0.148 |
MFT | +0.142 |
タ | +0.134 |
\": | +0.132 |
LM | +0.127 |
Neg logit contributions | |
weet | -0.150 |
orate | -0.113 |
sex | -0.112 |
antly | -0.110 |
iculture | -0.110 |
Token | saw |
Token ID | 2497 |
Feature activation | -1.92e-01 |
Pos logit contributions | |
oot | +0.142 |
MFT | +0.136 |
タ | +0.128 |
\": | +0.127 |
LM | +0.123 |
Neg logit contributions | |
weet | -0.126 |
orate | -0.093 |
sex | -0.092 |
antly | -0.090 |
iculture | -0.090 |
Token | the |
Token ID | 262 |
Feature activation | +2.58e-02 |
Pos logit contributions | |
weet | +0.139 |
orate | +0.105 |
sex | +0.103 |
iculture | +0.102 |
antly | +0.102 |
Neg logit contributions | |
oot | -0.140 |
MFT | -0.134 |
タ | -0.125 |
\": | -0.125 |
LM | -0.120 |
Token | irony |
Token ID | 21296 |
Feature activation | +8.68e-02 |
Pos logit contributions | |
oot | +0.019 |
MFT | +0.018 |
タ | +0.017 |
\": | +0.016 |
heim | +0.016 |
Neg logit contributions | |
weet | -0.019 |
orate | -0.014 |
sex | -0.014 |
antly | -0.014 |
iculture | -0.014 |
Token | in |
Token ID | 287 |
Feature activation | +1.98e-02 |
Pos logit contributions | |
oot | +0.062 |
MFT | +0.059 |
タ | +0.055 |
\": | +0.055 |
LM | +0.052 |
Neg logit contributions | |
weet | -0.065 |
orate | -0.049 |
sex | -0.048 |
antly | -0.048 |
iculture | -0.048 |
Token | his |
Token ID | 465 |
Feature activation | -1.33e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.013 |
タ | +0.013 |
\": | +0.013 |
heim | +0.012 |
Neg logit contributions | |
weet | -0.015 |
orate | -0.011 |
sex | -0.011 |
antly | -0.011 |
redistributed | -0.011 |
Token | statement |
Token ID | 2643 |
Feature activation | -3.64e-02 |
Pos logit contributions | |
weet | +0.010 |
orate | +0.007 |
sex | +0.007 |
antly | +0.007 |
iculture | +0.007 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.009 |
タ | -0.009 |
\": | -0.009 |
heim | -0.008 |
Token | that |
Token ID | 326 |
Feature activation | -1.91e-03 |
Pos logit contributions | |
weet | +0.027 |
orate | +0.021 |
sex | +0.020 |
antly | +0.020 |
iculture | +0.020 |
Neg logit contributions | |
oot | -0.026 |
MFT | -0.025 |
タ | -0.023 |
\": | -0.023 |
LM | -0.022 |
Token | day |
Token ID | 1110 |
Feature activation | +1.73e-02 |
Pos logit contributions | |
weet | +0.001 |
orate | +0.001 |
sex | +0.001 |
antly | +0.001 |
redistributed | +0.001 |
Neg logit contributions | |
oot | -0.001 |
MFT | -0.001 |
タ | -0.001 |
\": | -0.001 |
heim | -0.001 |
Token | law |
Token ID | 1099 |
Feature activation | +1.12e-01 |
Pos logit contributions | |
oot | +0.007 |
MFT | +0.007 |
タ | +0.006 |
\": | +0.006 |
LM | +0.006 |
Neg logit contributions | |
weet | -0.011 |
orate | -0.009 |
sex | -0.008 |
iculture | -0.008 |
antly | -0.008 |
Token | — |
Token ID | 960 |
Feature activation | +7.44e-02 |
Pos logit contributions | |
oot | +0.080 |
MFT | +0.076 |
タ | +0.071 |
\": | +0.071 |
LM | +0.068 |
Neg logit contributions | |
weet | -0.083 |
orate | -0.063 |
sex | -0.062 |
antly | -0.061 |
iculture | -0.061 |
Token | proof |
Token ID | 13288 |
Feature activation | +1.84e-01 |
Pos logit contributions | |
oot | +0.052 |
MFT | +0.049 |
タ | +0.046 |
\": | +0.046 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.056 |
orate | -0.043 |
sex | -0.042 |
antly | -0.042 |
iculture | -0.042 |
Token | beyond |
Token ID | 3675 |
Feature activation | +4.44e-02 |
Pos logit contributions | |
oot | +0.125 |
MFT | +0.119 |
タ | +0.111 |
\": | +0.110 |
LM | +0.106 |
Neg logit contributions | |
weet | -0.143 |
orate | -0.110 |
sex | -0.108 |
antly | -0.107 |
iculture | -0.107 |
Token | a |
Token ID | 257 |
Feature activation | +3.87e-02 |
Pos logit contributions | |
oot | +0.034 |
MFT | +0.032 |
タ | +0.031 |
\": | +0.030 |
LM | +0.029 |
Neg logit contributions | |
weet | -0.031 |
orate | -0.023 |
sex | -0.022 |
antly | -0.022 |
iculture | -0.022 |
Token | reasonable |
Token ID | 6397 |
Feature activation | -2.11e-02 |
Pos logit contributions | |
oot | +0.031 |
MFT | +0.030 |
タ | +0.028 |
\": | +0.028 |
heim | +0.027 |
Neg logit contributions | |
weet | -0.025 |
orate | -0.018 |
sex | -0.018 |
antly | -0.017 |
redistributed | -0.017 |
Token | doubt |
Token ID | 4719 |
Feature activation | +4.56e-02 |
Pos logit contributions | |
weet | +0.015 |
orate | +0.011 |
sex | +0.011 |
antly | +0.011 |
iculture | +0.011 |
Neg logit contributions | |
oot | -0.016 |
MFT | -0.015 |
タ | -0.014 |
\": | -0.014 |
LM | -0.014 |
Token | — |
Token ID | 960 |
Feature activation | +1.06e-01 |
Pos logit contributions | |
oot | +0.032 |
MFT | +0.030 |
タ | +0.028 |
\": | +0.028 |
LM | +0.027 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.027 |
sex | -0.026 |
antly | -0.026 |
iculture | -0.026 |
Token | there |
Token ID | 8117 |
Feature activation | +8.56e-02 |
Pos logit contributions | |
oot | +0.071 |
MFT | +0.068 |
タ | +0.063 |
\": | +0.063 |
LM | +0.060 |
Neg logit contributions | |
weet | -0.083 |
orate | -0.064 |
sex | -0.063 |
antly | -0.062 |
iculture | -0.062 |
Token | comes |
Token ID | 2058 |
Feature activation | -1.41e-01 |
Pos logit contributions | |
oot | +0.059 |
MFT | +0.056 |
タ | +0.053 |
\": | +0.052 |
LM | +0.050 |
Neg logit contributions | |
weet | -0.065 |
orate | -0.050 |
sex | -0.049 |
antly | -0.049 |
iculture | -0.048 |
Token | a |
Token ID | 257 |
Feature activation | +1.86e-02 |
Pos logit contributions | |
weet | +0.100 |
orate | +0.074 |
sex | +0.073 |
antly | +0.072 |
iculture | +0.072 |
Neg logit contributions | |
oot | -0.105 |
MFT | -0.100 |
タ | -0.095 |
\": | -0.094 |
LM | -0.090 |
Token | and |
Token ID | 290 |
Feature activation | +6.63e-02 |
Pos logit contributions | |
weet | +0.060 |
orate | +0.045 |
sex | +0.044 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.062 |
MFT | -0.059 |
タ | -0.056 |
\": | -0.055 |
LM | -0.053 |
Token | bad |
Token ID | 2089 |
Feature activation | +7.50e-02 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.049 |
タ | +0.046 |
\": | +0.046 |
heim | +0.044 |
Neg logit contributions | |
weet | -0.045 |
orate | -0.033 |
sex | -0.033 |
antly | -0.032 |
redistributed | -0.032 |
Token | policies |
Token ID | 4788 |
Feature activation | -1.08e-01 |
Pos logit contributions | |
oot | +0.056 |
MFT | +0.053 |
タ | +0.050 |
\": | +0.050 |
LM | +0.048 |
Neg logit contributions | |
weet | -0.053 |
orate | -0.040 |
sex | -0.039 |
iculture | -0.038 |
antly | -0.038 |
Token | , |
Token ID | 11 |
Feature activation | +6.83e-03 |
Pos logit contributions | |
weet | +0.078 |
orate | +0.058 |
sex | +0.057 |
antly | +0.056 |
iculture | +0.056 |
Neg logit contributions | |
oot | -0.079 |
MFT | -0.076 |
タ | -0.071 |
\": | -0.071 |
LM | -0.068 |
Token | or |
Token ID | 393 |
Feature activation | +3.76e-02 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.004 |
\": | +0.004 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token | be |
Token ID | 307 |
Feature activation | -2.43e-02 |
Pos logit contributions | |
oot | +0.026 |
MFT | +0.025 |
タ | +0.024 |
\": | +0.023 |
LM | +0.022 |
Neg logit contributions | |
weet | -0.028 |
orate | -0.021 |
sex | -0.021 |
antly | -0.021 |
iculture | -0.021 |
Token | reasoned |
Token ID | 37857 |
Feature activation | -1.54e-02 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
iculture | +0.012 |
antly | +0.012 |
Neg logit contributions | |
oot | -0.019 |
MFT | -0.018 |
タ | -0.017 |
\": | -0.017 |
LM | -0.016 |
Token | with |
Token ID | 351 |
Feature activation | +3.47e-02 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.009 |
sex | +0.009 |
iculture | +0.008 |
antly | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.010 |
タ | -0.010 |
\": | -0.010 |
LM | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | -9.67e-03 |
Pos logit contributions | |
oot | +0.025 |
MFT | +0.023 |
タ | +0.022 |
\": | +0.022 |
LM | +0.021 |
Neg logit contributions | |
weet | -0.026 |
orate | -0.020 |
sex | -0.019 |
iculture | -0.019 |
antly | -0.019 |
Token |
|
Token ID | 198 |
Feature activation | +6.42e-03 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.008 |
MFT | -0.007 |
タ | -0.007 |
\": | -0.007 |
LM | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | -2.28e-04 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.004 |
\": | +0.004 |
LM | +0.004 |
Neg logit contributions | |
weet | -0.004 |
orate | -0.003 |
sex | -0.003 |
antly | -0.003 |
iculture | -0.003 |
Token | numerous |
Token ID | 6409 |
Feature activation | -7.58e-02 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.003 |
タ | +0.003 |
\": | +0.003 |
LM | +0.003 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | theaters |
Token ID | 20550 |
Feature activation | +6.42e-02 |
Pos logit contributions | |
weet | +0.056 |
orate | +0.042 |
sex | +0.041 |
antly | +0.041 |
iculture | +0.041 |
Neg logit contributions | |
oot | -0.054 |
MFT | -0.052 |
タ | -0.049 |
\": | -0.048 |
heim | -0.047 |
Token | throughout |
Token ID | 3690 |
Feature activation | +7.14e-03 |
Pos logit contributions | |
oot | +0.047 |
MFT | +0.045 |
タ | +0.042 |
\": | +0.042 |
LM | +0.040 |
Neg logit contributions | |
weet | -0.047 |
orate | -0.035 |
sex | -0.034 |
antly | -0.034 |
iculture | -0.034 |
Token | Texas |
Token ID | 3936 |
Feature activation | -1.61e-02 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.005 |
\": | +0.005 |
heim | +0.004 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
Trinidad | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | -5.43e-02 |
Pos logit contributions | |
weet | +0.012 |
orate | +0.009 |
sex | +0.009 |
iculture | +0.009 |
antly | +0.009 |
Neg logit contributions | |
oot | -0.012 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | The |
Token ID | 383 |
Feature activation | -8.95e-02 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.028 |
sex | +0.028 |
antly | +0.027 |
iculture | +0.027 |
Neg logit contributions | |
oot | -0.041 |
MFT | -0.039 |
タ | -0.037 |
\": | -0.037 |
LM | -0.035 |
Token | Daniels |
Token ID | 28162 |
Feature activation | -1.60e-01 |
Pos logit contributions | |
weet | +0.064 |
orate | +0.048 |
sex | +0.047 |
antly | +0.046 |
iculture | +0.046 |
Neg logit contributions | |
oot | -0.066 |
MFT | -0.063 |
タ | -0.060 |
\": | -0.059 |
LM | -0.057 |
Token | family |
Token ID | 1641 |
Feature activation | +7.60e-03 |
Pos logit contributions | |
weet | +0.120 |
orate | +0.092 |
sex | +0.090 |
antly | +0.089 |
iculture | +0.089 |
Neg logit contributions | |
oot | -0.112 |
MFT | -0.107 |
タ | -0.101 |
\": | -0.100 |
LM | -0.096 |
Token | owned |
Token ID | 6898 |
Feature activation | +1.88e-02 |
Pos logit contributions | |
oot | +0.006 |
MFT | +0.005 |
タ | +0.005 |
\": | +0.005 |
LM | +0.005 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token | Se |
Token ID | 1001 |
Feature activation | +4.68e-02 |
Pos logit contributions | |
oot | +0.014 |
MFT | +0.014 |
タ | +0.013 |
\": | +0.013 |
LM | +0.012 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.010 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | gu |
Token ID | 5162 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
oot | +0.023 |
MFT | +0.022 |
タ | +0.020 |
\": | +0.020 |
LM | +0.019 |
Neg logit contributions | |
weet | -0.045 |
orate | -0.036 |
sex | -0.036 |
antly | -0.035 |
iculture | -0.035 |
Token | uggets |
Token ID | 26550 |
Feature activation | -3.75e-02 |
Pos logit contributions | |
weet | +0.085 |
orate | +0.065 |
sex | +0.064 |
iculture | +0.063 |
antly | +0.063 |
Neg logit contributions | |
oot | -0.080 |
MFT | -0.076 |
タ | -0.071 |
\": | -0.071 |
LM | -0.068 |
Token | from |
Token ID | 422 |
Feature activation | -8.88e-03 |
Pos logit contributions | |
weet | +0.028 |
orate | +0.021 |
sex | +0.020 |
antly | +0.020 |
redistributed | +0.020 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
タ | -0.024 |
\": | -0.024 |
LM | -0.023 |
Token | a |
Token ID | 257 |
Feature activation | -4.13e-02 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.005 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.006 |
\": | -0.006 |
LM | -0.005 |
Token | few |
Token ID | 1178 |
Feature activation | -4.13e-02 |
Pos logit contributions | |
weet | +0.031 |
orate | +0.024 |
sex | +0.023 |
antly | +0.023 |
iculture | +0.023 |
Neg logit contributions | |
oot | -0.029 |
MFT | -0.027 |
タ | -0.026 |
\": | -0.026 |
LM | -0.024 |
Token | hours |
Token ID | 2250 |
Feature activation | -1.05e-01 |
Pos logit contributions | |
weet | +0.032 |
orate | +0.024 |
sex | +0.024 |
antly | +0.023 |
iculture | +0.023 |
Neg logit contributions | |
oot | -0.029 |
MFT | -0.027 |
タ | -0.026 |
\": | -0.025 |
LM | -0.024 |
Token | ago |
Token ID | 2084 |
Feature activation | -1.12e-01 |
Pos logit contributions | |
weet | +0.078 |
orate | +0.059 |
sex | +0.058 |
antly | +0.057 |
iculture | +0.057 |
Neg logit contributions | |
oot | -0.075 |
MFT | -0.071 |
タ | -0.067 |
\": | -0.067 |
LM | -0.064 |
Token | , |
Token ID | 11 |
Feature activation | -6.69e-02 |
Pos logit contributions | |
weet | +0.082 |
orate | +0.062 |
sex | +0.061 |
antly | +0.060 |
iculture | +0.060 |
Neg logit contributions | |
oot | -0.080 |
MFT | -0.076 |
タ | -0.072 |
\": | -0.071 |
LM | -0.068 |
Token | on |
Token ID | 319 |
Feature activation | -5.10e-02 |
Pos logit contributions | |
weet | +0.050 |
orate | +0.038 |
sex | +0.037 |
antly | +0.037 |
iculture | +0.037 |
Neg logit contributions | |
oot | -0.047 |
MFT | -0.045 |
タ | -0.042 |
\": | -0.042 |
LM | -0.040 |
Token | Billy |
Token ID | 15890 |
Feature activation | +6.70e-02 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.029 |
sex | +0.028 |
iculture | +0.028 |
antly | +0.028 |
Neg logit contributions | |
oot | -0.036 |
MFT | -0.035 |
タ | -0.033 |
\": | -0.032 |
LM | -0.031 |
Token | Turner |
Token ID | 15406 |
Feature activation | -7.11e-02 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.048 |
タ | +0.046 |
\": | +0.045 |
LM | +0.043 |
Neg logit contributions | |
weet | -0.047 |
orate | -0.035 |
sex | -0.034 |
iculture | -0.034 |
antly | -0.034 |
Token | , |
Token ID | 11 |
Feature activation | -5.15e-02 |
Pos logit contributions | |
weet | +0.052 |
orate | +0.039 |
sex | +0.039 |
iculture | +0.038 |
antly | +0.038 |
Neg logit contributions | |
oot | -0.051 |
MFT | -0.049 |
タ | -0.046 |
\": | -0.045 |
LM | -0.044 |
Token | trend |
Token ID | 5182 |
Feature activation | +2.14e-01 |
Pos logit contributions | |
weet | +0.038 |
orate | +0.030 |
sex | +0.030 |
antly | +0.030 |
iculture | +0.030 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.020 |
\": | -0.020 |
LM | -0.019 |
Token | ? |
Token ID | 30 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
oot | +0.148 |
MFT | +0.141 |
タ | +0.132 |
\": | +0.131 |
LM | +0.126 |
Neg logit contributions | |
weet | -0.163 |
orate | -0.124 |
sex | -0.122 |
antly | -0.121 |
iculture | -0.121 |
Token | Would |
Token ID | 10928 |
Feature activation | -1.62e-01 |
Pos logit contributions | |
weet | +0.007 |
orate | +0.005 |
sex | +0.005 |
iculture | +0.005 |
antly | +0.005 |
Neg logit contributions | |
oot | -0.008 |
MFT | -0.008 |
タ | -0.008 |
\": | -0.007 |
LM | -0.007 |
Token | you |
Token ID | 345 |
Feature activation | +4.81e-02 |
Pos logit contributions | |
weet | +0.131 |
orate | +0.102 |
sex | +0.100 |
iculture | +0.100 |
antly | +0.100 |
Neg logit contributions | |
oot | -0.104 |
MFT | -0.099 |
タ | -0.092 |
\": | -0.091 |
LM | -0.088 |
Token | ever |
Token ID | 1683 |
Feature activation | +8.59e-02 |
Pos logit contributions | |
oot | +0.035 |
MFT | +0.033 |
タ | +0.031 |
\": | +0.031 |
LM | +0.030 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.026 |
sex | -0.026 |
iculture | -0.026 |
antly | -0.025 |
Token | let |
Token ID | 1309 |
Feature activation | -1.29e-01 |
Pos logit contributions | |
oot | +0.061 |
MFT | +0.058 |
タ | +0.054 |
\": | +0.053 |
LM | +0.052 |
Neg logit contributions | |
weet | -0.064 |
orate | -0.049 |
sex | -0.048 |
antly | -0.048 |
iculture | -0.048 |
Token | your |
Token ID | 534 |
Feature activation | -4.05e-02 |
Pos logit contributions | |
weet | +0.095 |
orate | +0.071 |
sex | +0.070 |
iculture | +0.069 |
antly | +0.069 |
Neg logit contributions | |
oot | -0.093 |
MFT | -0.089 |
タ | -0.083 |
\": | -0.082 |
LM | -0.080 |
Token | daughter |
Token ID | 4957 |
Feature activation | -5.89e-02 |
Pos logit contributions | |
weet | +0.029 |
orate | +0.022 |
sex | +0.021 |
iculture | +0.021 |
antly | +0.021 |
Neg logit contributions | |
oot | -0.030 |
MFT | -0.029 |
タ | -0.027 |
\": | -0.027 |
LM | -0.026 |
Token | take |
Token ID | 1011 |
Feature activation | -1.53e-02 |
Pos logit contributions | |
weet | +0.041 |
orate | +0.031 |
sex | +0.030 |
iculture | +0.030 |
antly | +0.030 |
Neg logit contributions | |
oot | -0.044 |
MFT | -0.042 |
タ | -0.040 |
\": | -0.040 |
LM | -0.038 |
Token | instruction |
Token ID | 12064 |
Feature activation | -2.09e-01 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.008 |
sex | +0.008 |
iculture | +0.008 |
antly | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.010 |
タ | -0.010 |
\": | -0.010 |
LM | -0.009 |
Token | from |
Token ID | 422 |
Feature activation | +1.35e-03 |
Pos logit contributions | |
weet | +0.161 |
orate | +0.123 |
sex | +0.121 |
iculture | +0.120 |
antly | +0.120 |
Neg logit contributions | |
oot | -0.143 |
MFT | -0.135 |
タ | -0.127 |
\": | -0.126 |
LM | -0.121 |
Token | of |
Token ID | 286 |
Feature activation | -3.05e-02 |
Pos logit contributions | |
weet | +0.093 |
orate | +0.070 |
sex | +0.069 |
iculture | +0.068 |
antly | +0.068 |
Neg logit contributions | |
oot | -0.088 |
MFT | -0.084 |
タ | -0.079 |
\": | -0.078 |
LM | -0.075 |
Token | overweight |
Token ID | 22652 |
Feature activation | +1.67e-02 |
Pos logit contributions | |
weet | +0.021 |
orate | +0.015 |
sex | +0.015 |
iculture | +0.015 |
antly | +0.015 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | and |
Token ID | 290 |
Feature activation | -4.52e-03 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.011 |
\": | +0.011 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.012 |
orate | -0.009 |
sex | -0.009 |
iculture | -0.009 |
antly | -0.009 |
Token | obesity |
Token ID | 13825 |
Feature activation | -3.36e-02 |
Pos logit contributions | |
weet | +0.003 |
orate | +0.002 |
sex | +0.002 |
antly | +0.002 |
iculture | +0.002 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.003 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token | in |
Token ID | 287 |
Feature activation | -3.69e-02 |
Pos logit contributions | |
weet | +0.025 |
orate | +0.019 |
sex | +0.019 |
iculture | +0.018 |
antly | +0.018 |
Neg logit contributions | |
oot | -0.024 |
MFT | -0.023 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | males |
Token ID | 10835 |
Feature activation | -1.38e-01 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.019 |
sex | +0.019 |
iculture | +0.019 |
antly | +0.019 |
Neg logit contributions | |
oot | -0.028 |
MFT | -0.027 |
タ | -0.025 |
\": | -0.025 |
LM | -0.024 |
Token | was |
Token ID | 373 |
Feature activation | -4.26e-02 |
Pos logit contributions | |
weet | +0.099 |
orate | +0.074 |
sex | +0.073 |
iculture | +0.072 |
antly | +0.072 |
Neg logit contributions | |
oot | -0.101 |
MFT | -0.097 |
タ | -0.091 |
\": | -0.090 |
LM | -0.087 |
Token | projected |
Token ID | 13301 |
Feature activation | -8.96e-02 |
Pos logit contributions | |
weet | +0.028 |
orate | +0.021 |
sex | +0.020 |
iculture | +0.020 |
antly | +0.020 |
Neg logit contributions | |
oot | -0.034 |
MFT | -0.032 |
タ | -0.030 |
\": | -0.030 |
LM | -0.029 |
Token | to |
Token ID | 284 |
Feature activation | -1.50e-02 |
Pos logit contributions | |
weet | +0.060 |
orate | +0.044 |
sex | +0.043 |
iculture | +0.043 |
antly | +0.043 |
Neg logit contributions | |
oot | -0.070 |
MFT | -0.067 |
タ | -0.063 |
\": | -0.063 |
LM | -0.061 |
Token | increase |
Token ID | 2620 |
Feature activation | +1.41e-01 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.009 |
sex | +0.008 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.010 |
タ | -0.009 |
\": | -0.009 |
LM | -0.009 |
Token | between |
Token ID | 1022 |
Feature activation | -3.04e-02 |
Pos logit contributions | |
oot | +0.101 |
MFT | +0.096 |
タ | +0.090 |
\": | +0.089 |
LM | +0.086 |
Neg logit contributions | |
weet | -0.104 |
orate | -0.079 |
sex | -0.077 |
iculture | -0.077 |
antly | -0.076 |
Token | we |
Token ID | 356 |
Feature activation | -6.30e-03 |
Pos logit contributions | |
weet | +0.009 |
orate | +0.007 |
sex | +0.007 |
antly | +0.007 |
iculture | +0.007 |
Neg logit contributions | |
oot | -0.009 |
MFT | -0.009 |
タ | -0.008 |
\": | -0.008 |
LM | -0.008 |
Token | risk |
Token ID | 2526 |
Feature activation | -3.73e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.003 |
sex | +0.003 |
antly | +0.003 |
iculture | +0.003 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.004 |
タ | -0.004 |
\": | -0.004 |
LM | -0.004 |
Token | it |
Token ID | 340 |
Feature activation | -2.25e-02 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.019 |
sex | +0.019 |
antly | +0.019 |
iculture | +0.019 |
Neg logit contributions | |
oot | -0.028 |
MFT | -0.027 |
タ | -0.026 |
\": | -0.025 |
LM | -0.024 |
Token | in |
Token ID | 287 |
Feature activation | +3.69e-03 |
Pos logit contributions | |
weet | +0.016 |
orate | +0.012 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token | the |
Token ID | 262 |
Feature activation | +9.68e-03 |
Pos logit contributions | |
oot | +0.003 |
MFT | +0.002 |
タ | +0.002 |
\": | +0.002 |
LM | +0.002 |
Neg logit contributions | |
weet | -0.003 |
orate | -0.002 |
sex | -0.002 |
antly | -0.002 |
iculture | -0.002 |
Token | first |
Token ID | 717 |
Feature activation | -7.81e-02 |
Pos logit contributions | |
oot | +0.008 |
MFT | +0.008 |
タ | +0.008 |
\": | +0.008 |
LM | +0.007 |
Neg logit contributions | |
weet | -0.006 |
orate | -0.004 |
sex | -0.004 |
iculture | -0.004 |
antly | -0.004 |
Token | place |
Token ID | 1295 |
Feature activation | +8.00e-02 |
Pos logit contributions | |
weet | +0.071 |
orate | +0.057 |
sex | +0.056 |
antly | +0.056 |
iculture | +0.056 |
Neg logit contributions | |
oot | -0.042 |
MFT | -0.040 |
タ | -0.037 |
\": | -0.036 |
LM | -0.034 |
Token | . |
Token ID | 13 |
Feature activation | -2.95e-02 |
Pos logit contributions | |
oot | +0.058 |
MFT | +0.055 |
タ | +0.052 |
\": | +0.052 |
LM | +0.050 |
Neg logit contributions | |
weet | -0.058 |
orate | -0.044 |
sex | -0.043 |
antly | -0.043 |
iculture | -0.042 |
Token | We |
Token ID | 775 |
Feature activation | -3.27e-02 |
Pos logit contributions | |
weet | +0.019 |
orate | +0.014 |
sex | +0.014 |
iculture | +0.013 |
antly | +0.013 |
Neg logit contributions | |
oot | -0.024 |
MFT | -0.023 |
タ | -0.021 |
\": | -0.021 |
LM | -0.021 |
Token | all |
Token ID | 477 |
Feature activation | -4.63e-03 |
Pos logit contributions | |
weet | +0.024 |
orate | +0.018 |
sex | +0.018 |
antly | +0.018 |
iculture | +0.018 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | need |
Token ID | 761 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
weet | +0.003 |
orate | +0.002 |
sex | +0.002 |
antly | +0.002 |
iculture | +0.002 |
Neg logit contributions | |
oot | -0.004 |
MFT | -0.003 |
タ | -0.003 |
\": | -0.003 |
LM | -0.003 |
Token | oll |
Token ID | 692 |
Feature activation | +1.84e-02 |
Pos logit contributions | |
weet | +0.006 |
orate | +0.005 |
sex | +0.005 |
antly | +0.005 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.006 |
MFT | -0.006 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token | are |
Token ID | 389 |
Feature activation | -4.10e-02 |
Pos logit contributions | |
oot | +0.013 |
MFT | +0.013 |
タ | +0.012 |
\": | +0.012 |
LM | +0.011 |
Neg logit contributions | |
weet | -0.014 |
orate | -0.010 |
sex | -0.010 |
iculture | -0.010 |
antly | -0.010 |
Token | displayed |
Token ID | 9066 |
Feature activation | -1.33e-01 |
Pos logit contributions | |
weet | +0.030 |
orate | +0.022 |
sex | +0.022 |
antly | +0.022 |
iculture | +0.022 |
Neg logit contributions | |
oot | -0.030 |
MFT | -0.029 |
タ | -0.027 |
\": | -0.027 |
LM | -0.026 |
Token | outside |
Token ID | 2354 |
Feature activation | -3.31e-02 |
Pos logit contributions | |
weet | +0.096 |
orate | +0.072 |
sex | +0.071 |
antly | +0.070 |
iculture | +0.070 |
Neg logit contributions | |
oot | -0.096 |
MFT | -0.092 |
タ | -0.087 |
\": | -0.086 |
LM | -0.083 |
Token | the |
Token ID | 262 |
Feature activation | -1.48e-02 |
Pos logit contributions | |
weet | +0.025 |
orate | +0.019 |
sex | +0.018 |
antly | +0.018 |
iculture | +0.018 |
Neg logit contributions | |
oot | -0.023 |
MFT | -0.022 |
タ | -0.021 |
\": | -0.021 |
LM | -0.020 |
Token | Maj |
Token ID | 12390 |
Feature activation | -3.47e-01 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.008 |
sex | +0.008 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.010 |
MFT | -0.010 |
タ | -0.009 |
\": | -0.009 |
LM | -0.009 |
Token | uro |
Token ID | 1434 |
Feature activation | +7.11e-02 |
Pos logit contributions | |
weet | +0.286 |
orate | +0.223 |
sex | +0.219 |
iculture | +0.217 |
antly | +0.217 |
Neg logit contributions | |
oot | -0.218 |
MFT | -0.210 |
タ | -0.195 |
\": | -0.193 |
LM | -0.184 |
Token | Water |
Token ID | 5638 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.049 |
タ | +0.046 |
\": | +0.045 |
LM | +0.044 |
Neg logit contributions | |
weet | -0.052 |
orate | -0.039 |
sex | -0.039 |
antly | -0.038 |
iculture | -0.038 |
Token | and |
Token ID | 290 |
Feature activation | -4.12e-02 |
Pos logit contributions | |
oot | +0.074 |
MFT | +0.070 |
タ | +0.066 |
\": | +0.065 |
LM | +0.063 |
Neg logit contributions | |
weet | -0.079 |
orate | -0.060 |
sex | -0.059 |
iculture | -0.058 |
antly | -0.058 |
Token | Sew |
Token ID | 27347 |
Feature activation | -1.55e-01 |
Pos logit contributions | |
weet | +0.026 |
orate | +0.019 |
sex | +0.018 |
antly | +0.018 |
iculture | +0.018 |
Neg logit contributions | |
oot | -0.034 |
MFT | -0.032 |
タ | -0.031 |
\": | -0.030 |
LM | -0.029 |
Token | er |
Token ID | 263 |
Feature activation | +1.36e-01 |
Pos logit contributions | |
weet | +0.138 |
orate | +0.110 |
sex | +0.109 |
antly | +0.108 |
iculture | +0.107 |
Neg logit contributions | |
oot | -0.088 |
MFT | -0.083 |
タ | -0.076 |
\": | -0.075 |
LM | -0.071 |
Token | described |
Token ID | 3417 |
Feature activation | -3.89e-02 |
Pos logit contributions | |
weet | +0.025 |
orate | +0.018 |
sex | +0.018 |
iculture | +0.018 |
antly | +0.018 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
タ | -0.025 |
\": | -0.024 |
LM | -0.024 |
Token | as |
Token ID | 355 |
Feature activation | -1.97e-02 |
Pos logit contributions | |
weet | +0.029 |
orate | +0.022 |
sex | +0.022 |
antly | +0.022 |
iculture | +0.021 |
Neg logit contributions | |
oot | -0.027 |
MFT | -0.026 |
タ | -0.025 |
\": | -0.024 |
LM | -0.023 |
Token | a |
Token ID | 257 |
Feature activation | -1.03e-02 |
Pos logit contributions | |
weet | +0.014 |
orate | +0.011 |
sex | +0.011 |
antly | +0.010 |
iculture | +0.010 |
Neg logit contributions | |
oot | -0.014 |
MFT | -0.014 |
タ | -0.013 |
\": | -0.013 |
LM | -0.012 |
Token | " |
Token ID | 366 |
Feature activation | -4.37e-02 |
Pos logit contributions | |
weet | +0.008 |
orate | +0.006 |
sex | +0.006 |
antly | +0.006 |
iculture | +0.006 |
Neg logit contributions | |
oot | -0.007 |
MFT | -0.007 |
タ | -0.007 |
\": | -0.007 |
LM | -0.006 |
Token | real |
Token ID | 5305 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
weet | +0.032 |
orate | +0.024 |
sex | +0.024 |
antly | +0.024 |
iculture | +0.024 |
Neg logit contributions | |
oot | -0.031 |
MFT | -0.030 |
タ | -0.028 |
\": | -0.028 |
LM | -0.027 |
Token | blight |
Token ID | 42514 |
Feature activation | -3.62e-01 |
Pos logit contributions | |
weet | +0.108 |
orate | +0.082 |
sex | +0.081 |
antly | +0.080 |
iculture | +0.080 |
Neg logit contributions | |
oot | -0.106 |
MFT | -0.101 |
タ | -0.095 |
\": | -0.094 |
LM | -0.091 |
Token | on |
Token ID | 319 |
Feature activation | -1.53e-02 |
Pos logit contributions | |
weet | +0.277 |
orate | +0.212 |
sex | +0.208 |
iculture | +0.206 |
antly | +0.205 |
Neg logit contributions | |
oot | -0.249 |
MFT | -0.239 |
タ | -0.222 |
\": | -0.222 |
LM | -0.211 |
Token | the |
Token ID | 262 |
Feature activation | +7.19e-03 |
Pos logit contributions | |
weet | +0.011 |
orate | +0.008 |
sex | +0.008 |
antly | +0.008 |
iculture | +0.008 |
Neg logit contributions | |
oot | -0.011 |
MFT | -0.011 |
タ | -0.010 |
\": | -0.010 |
LM | -0.010 |
Token | area |
Token ID | 1989 |
Feature activation | +1.25e-01 |
Pos logit contributions | |
oot | +0.005 |
MFT | +0.005 |
タ | +0.004 |
\": | +0.004 |
heim | +0.004 |
Neg logit contributions | |
weet | -0.005 |
orate | -0.004 |
sex | -0.004 |
antly | -0.004 |
iculture | -0.004 |
Token | ". |
Token ID | 1911 |
Feature activation | +6.28e-02 |
Pos logit contributions | |
oot | +0.080 |
MFT | +0.076 |
タ | +0.070 |
\": | +0.070 |
LM | +0.067 |
Neg logit contributions | |
weet | -0.102 |
orate | -0.080 |
sex | -0.078 |
iculture | -0.078 |
antly | -0.077 |
Token |
|
Token ID | 198 |
Feature activation | -2.78e-02 |
Pos logit contributions | |
oot | +0.046 |
MFT | +0.044 |
タ | +0.041 |
\": | +0.041 |
LM | +0.040 |
Neg logit contributions | |
weet | -0.045 |
orate | -0.034 |
sex | -0.033 |
antly | -0.033 |
iculture | -0.033 |
Token | . |
Token ID | 13 |
Feature activation | -4.74e-02 |
Pos logit contributions | |
weet | +0.005 |
orate | +0.004 |
sex | +0.004 |
antly | +0.004 |
iculture | +0.004 |
Neg logit contributions | |
oot | -0.005 |
MFT | -0.005 |
タ | -0.005 |
\": | -0.005 |
LM | -0.005 |
Token |
|
Token ID | 198 |
Feature activation | -4.88e-02 |
Pos logit contributions | |
weet | +0.034 |
orate | +0.025 |
sex | +0.025 |
antly | +0.024 |
iculture | +0.024 |
Neg logit contributions | |
oot | -0.035 |
MFT | -0.034 |
タ | -0.032 |
\": | -0.031 |
LM | -0.030 |
Token |
|
Token ID | 198 |
Feature activation | -5.33e-02 |
Pos logit contributions | |
weet | +0.034 |
orate | +0.025 |
sex | +0.025 |
antly | +0.025 |
iculture | +0.025 |
Neg logit contributions | |
oot | -0.037 |
MFT | -0.035 |
タ | -0.033 |
\": | -0.033 |
LM | -0.032 |
Token | HOW |
Token ID | 37181 |
Feature activation | +5.24e-02 |
Pos logit contributions | |
weet | +0.041 |
orate | +0.031 |
sex | +0.031 |
antly | +0.030 |
iculture | +0.030 |
Neg logit contributions | |
oot | -0.037 |
MFT | -0.035 |
タ | -0.033 |
\": | -0.032 |
LM | -0.031 |
Token | TO |
Token ID | 5390 |
Feature activation | -8.89e-02 |
Pos logit contributions | |
oot | +0.042 |
MFT | +0.040 |
\": | +0.038 |
タ | +0.038 |
LM | +0.037 |
Neg logit contributions | |
weet | -0.034 |
orate | -0.025 |
iculture | -0.024 |
sex | -0.024 |
redistributed | -0.024 |
Token | INST |
Token ID | 40589 |
Feature activation | -4.05e-01 |
Pos logit contributions | |
weet | +0.056 |
orate | +0.040 |
sex | +0.039 |
antly | +0.038 |
iculture | +0.038 |
Neg logit contributions | |
oot | -0.073 |
MFT | -0.070 |
タ | -0.067 |
\": | -0.066 |
LM | -0.064 |
Token | ALL |
Token ID | 7036 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
weet | +0.263 |
orate | +0.190 |
sex | +0.187 |
antly | +0.184 |
iculture | +0.183 |
Neg logit contributions | |
oot | -0.325 |
MFT | -0.312 |
タ | -0.295 |
\": | -0.293 |
LM | -0.283 |
Token | NET |
Token ID | 30502 |
Feature activation | +1.69e-01 |
Pos logit contributions | |
weet | +0.096 |
orate | +0.076 |
sex | +0.075 |
iculture | +0.074 |
antly | +0.074 |
Neg logit contributions | |
oot | -0.069 |
MFT | -0.065 |
タ | -0.060 |
\": | -0.060 |
LM | -0.057 |
Token | FL |
Token ID | 3697 |
Feature activation | -6.30e-02 |
Pos logit contributions | |
oot | +0.102 |
MFT | +0.096 |
タ | +0.089 |
\": | +0.088 |
LM | +0.085 |
Neg logit contributions | |
weet | -0.143 |
orate | -0.114 |
antly | -0.111 |
sex | -0.111 |
iculture | -0.111 |
Token | IX |
Token ID | 10426 |
Feature activation | -1.38e-01 |
Pos logit contributions | |
weet | +0.043 |
orate | +0.031 |
sex | +0.031 |
antly | +0.030 |
iculture | +0.030 |
Neg logit contributions | |
oot | -0.049 |
MFT | -0.047 |
タ | -0.044 |
\": | -0.044 |
LM | -0.042 |
Token | ON |
Token ID | 6177 |
Feature activation | -1.53e-01 |
Pos logit contributions | |
weet | +0.083 |
orate | +0.058 |
sex | +0.057 |
antly | +0.056 |
iculture | +0.056 |
Neg logit contributions | |
oot | -0.117 |
MFT | -0.112 |
タ | -0.107 |
\": | -0.106 |
LM | -0.103 |
Token | Courtesy |
Token ID | 22984 |
Feature activation | -8.31e-02 |
Pos logit contributions | |
weet | +0.044 |
orate | +0.033 |
sex | +0.032 |
antly | +0.032 |
iculture | +0.032 |
Neg logit contributions | |
oot | -0.047 |
MFT | -0.044 |
タ | -0.042 |
\": | -0.042 |
LM | -0.040 |
Token | Everett |
Token ID | 36815 |
Feature activation | -7.55e-02 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.044 |
sex | +0.044 |
antly | +0.043 |
iculture | +0.043 |
Neg logit contributions | |
oot | -0.062 |
MFT | -0.059 |
タ | -0.055 |
\": | -0.055 |
LM | -0.053 |
Token | Collection |
Token ID | 12251 |
Feature activation | +1.25e-02 |
Pos logit contributions | |
weet | +0.059 |
orate | +0.046 |
sex | +0.045 |
antly | +0.044 |
iculture | +0.044 |
Neg logit contributions | |
oot | -0.051 |
MFT | -0.048 |
タ | -0.045 |
\": | -0.044 |
LM | -0.043 |
Token | Amy |
Token ID | 14235 |
Feature activation | +7.52e-02 |
Pos logit contributions | |
oot | +0.009 |
MFT | +0.009 |
タ | +0.008 |
\": | +0.008 |
LM | +0.008 |
Neg logit contributions | |
weet | -0.009 |
orate | -0.007 |
sex | -0.007 |
antly | -0.006 |
iculture | -0.006 |
Token | Sed |
Token ID | 22710 |
Feature activation | +8.47e-02 |
Pos logit contributions | |
oot | +0.046 |
MFT | +0.043 |
タ | +0.040 |
\": | +0.040 |
LM | +0.038 |
Neg logit contributions | |
weet | -0.064 |
orate | -0.050 |
sex | -0.049 |
antly | -0.049 |
iculture | -0.049 |
Token | aris |
Token ID | 20066 |
Feature activation | -3.42e-01 |
Pos logit contributions | |
oot | +0.088 |
MFT | +0.085 |
タ | +0.082 |
\": | +0.082 |
LM | +0.079 |
Neg logit contributions | |
weet | -0.035 |
orate | -0.019 |
sex | -0.019 |
iculture | -0.018 |
antly | -0.018 |
Token | in |
Token ID | 287 |
Feature activation | -2.28e-02 |
Pos logit contributions | |
weet | +0.249 |
orate | +0.185 |
sex | +0.183 |
antly | +0.181 |
redistributed | +0.180 |
Neg logit contributions | |
oot | -0.250 |
MFT | -0.238 |
タ | -0.224 |
\": | -0.221 |
LM | -0.213 |
Token | Str |
Token ID | 4285 |
Feature activation | -6.19e-02 |
Pos logit contributions | |
weet | +0.017 |
orate | +0.012 |
sex | +0.012 |
antly | +0.012 |
iculture | +0.012 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token | angers |
Token ID | 6606 |
Feature activation | +7.22e-02 |
Pos logit contributions | |
weet | +0.048 |
orate | +0.037 |
sex | +0.036 |
antly | +0.036 |
iculture | +0.036 |
Neg logit contributions | |
oot | -0.042 |
MFT | -0.040 |
タ | -0.038 |
\": | -0.037 |
LM | -0.036 |
Token | with |
Token ID | 351 |
Feature activation | +2.81e-02 |
Pos logit contributions | |
oot | +0.052 |
MFT | +0.050 |
タ | +0.047 |
\": | +0.046 |
LM | +0.045 |
Neg logit contributions | |
weet | -0.053 |
orate | -0.040 |
sex | -0.039 |
antly | -0.039 |
iculture | -0.039 |
Token | Candy |
Token ID | 24680 |
Feature activation | -2.61e-01 |
Pos logit contributions | |
oot | +0.020 |
MFT | +0.019 |
タ | +0.018 |
\": | +0.017 |
LM | +0.017 |
Neg logit contributions | |
weet | -0.021 |
orate | -0.016 |
sex | -0.016 |
antly | -0.016 |
iculture | -0.016 |
Token | load |
Token ID | 2220 |
Feature activation | +1.98e-01 |
Pos logit contributions | |
oot | +0.059 |
MFT | +0.056 |
タ | +0.051 |
\": | +0.051 |
LM | +0.048 |
Neg logit contributions | |
weet | -0.090 |
orate | -0.072 |
sex | -0.071 |
antly | -0.070 |
iculture | -0.070 |
Token | With |
Token ID | 3152 |
Feature activation | -2.73e-02 |
Pos logit contributions | |
oot | +0.152 |
MFT | +0.146 |
タ | +0.137 |
\": | +0.136 |
LM | +0.131 |
Neg logit contributions | |
weet | -0.136 |
orate | -0.100 |
sex | -0.098 |
iculture | -0.097 |
antly | -0.097 |
Token | Part |
Token ID | 7841 |
Feature activation | +7.75e-02 |
Pos logit contributions | |
weet | +0.023 |
orate | +0.018 |
sex | +0.018 |
iculture | +0.017 |
antly | +0.017 |
Neg logit contributions | |
oot | -0.017 |
MFT | -0.016 |
タ | -0.015 |
\": | -0.015 |
LM | -0.014 |
Token | ial |
Token ID | 498 |
Feature activation | +5.95e-02 |
Pos logit contributions | |
oot | +0.051 |
MFT | +0.048 |
タ | +0.045 |
\": | +0.044 |
LM | +0.042 |
Neg logit contributions | |
weet | -0.062 |
orate | -0.048 |
sex | -0.047 |
antly | -0.047 |
iculture | -0.047 |
Token | Name |
Token ID | 5376 |
Feature activation | +1.72e-02 |
Pos logit contributions | |
oot | +0.043 |
MFT | +0.040 |
タ | +0.038 |
\": | +0.038 |
LM | +0.036 |
Neg logit contributions | |
weet | -0.044 |
orate | -0.033 |
sex | -0.033 |
antly | -0.032 |
iculture | -0.032 |
Token | (' |
Token ID | 10786 |
Feature activation | -3.40e-01 |
Pos logit contributions | |
oot | +0.017 |
MFT | +0.016 |
タ | +0.016 |
\": | +0.016 |
LM | +0.015 |
Neg logit contributions | |
weet | -0.008 |
orate | -0.005 |
sex | -0.005 |
iculture | -0.005 |
antly | -0.005 |
Token | system |
Token ID | 10057 |
Feature activation | +5.22e-02 |
Pos logit contributions | |
weet | +0.268 |
orate | +0.206 |
sex | +0.205 |
antly | +0.202 |
iculture | +0.202 |
Neg logit contributions | |
oot | -0.225 |
MFT | -0.215 |
タ | -0.200 |
\": | -0.199 |
LM | -0.190 |
Token | . |
Token ID | 13 |
Feature activation | -4.02e-02 |
Pos logit contributions | |
oot | +0.037 |
MFT | +0.036 |
タ | +0.034 |
\": | +0.033 |
LM | +0.032 |
Neg logit contributions | |
weet | -0.039 |
orate | -0.029 |
sex | -0.029 |
antly | -0.028 |
iculture | -0.028 |
Token | windows |
Token ID | 28457 |
Feature activation | +1.69e-02 |
Pos logit contributions | |
weet | +0.027 |
orate | +0.020 |
sex | +0.020 |
iculture | +0.019 |
antly | +0.019 |
Neg logit contributions | |
oot | -0.031 |
MFT | -0.030 |
タ | -0.028 |
\": | -0.028 |
LM | -0.027 |
Token | . |
Token ID | 13 |
Feature activation | -2.86e-02 |
Pos logit contributions | |
oot | +0.012 |
MFT | +0.011 |
タ | +0.011 |
\": | +0.011 |
LM | +0.010 |
Neg logit contributions | |
weet | -0.013 |
orate | -0.010 |
sex | -0.009 |
antly | -0.009 |
iculture | -0.009 |
Token | forms |
Token ID | 23914 |
Feature activation | +8.61e-02 |
Pos logit contributions | |
weet | +0.022 |
orate | +0.016 |
sex | +0.016 |
iculture | +0.016 |
antly | +0.016 |
Neg logit contributions | |
oot | -0.020 |
MFT | -0.019 |
タ | -0.018 |
\": | -0.018 |
LM | -0.017 |