Token | , |
Token ID | 11 |
Feature activation | +4.14e-01 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
moral | +0.002 |
?" | +0.002 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Mine | -0.002 |
Carl | -0.002 |
fought | -0.002 |
Token | Jane |
Token ID | 12091 |
Feature activation | -6.68e-01 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.008 |
Carl | +0.008 |
Mine | +0.008 |
fought | +0.008 |
Neg logit contributions | |
oral | -0.011 |
?'" | -0.011 |
adies | -0.011 |
moral | -0.010 |
Fore | -0.010 |
Token | y |
Token ID | 88 |
Feature activation | +1.06e+00 |
Pos logit contributions | |
oral | +0.015 |
adies | +0.015 |
Fore | +0.014 |
?'" | +0.014 |
moral | +0.014 |
Neg logit contributions | |
tug | -0.017 |
fought | -0.016 |
whe | -0.016 |
ci | -0.015 |
Carl | -0.015 |
Token | and |
Token ID | 290 |
Feature activation | +1.95e+00 |
Pos logit contributions | |
tug | +0.025 |
fought | +0.024 |
ci | +0.022 |
Carl | +0.022 |
skate | +0.022 |
Neg logit contributions | |
adies | -0.026 |
oral | -0.026 |
Fore | -0.025 |
?'" | -0.024 |
Him | -0.024 |
Token | Mrs |
Token ID | 9074 |
Feature activation | +2.69e+00 |
Pos logit contributions | |
tug | +0.057 |
whe | +0.056 |
Mine | +0.052 |
iled | +0.052 |
Carl | +0.052 |
Neg logit contributions | |
oral | -0.036 |
?'" | -0.035 |
moral | -0.034 |
?" | -0.033 |
invites | -0.033 |
Token | . |
Token ID | 13 |
Feature activation | +4.53e+00 |
Pos logit contributions | |
tug | +0.073 |
whe | +0.066 |
fought | +0.063 |
skate | +0.062 |
Mine | +0.062 |
Neg logit contributions | |
?'" | -0.059 |
oral | -0.058 |
Him | -0.056 |
| -0.056 |
?" | -0.055 |
Token | Smith |
Token ID | 4176 |
Feature activation | +1.65e+00 |
Pos logit contributions | |
tug | +0.115 |
whe | +0.111 |
fought | +0.102 |
Mine | +0.102 |
skate | +0.099 |
Neg logit contributions | |
oral | -0.105 |
?'" | -0.102 |
moral | -0.096 |
| -0.096 |
?" | -0.095 |
Token | would |
Token ID | 561 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
tug | +0.058 |
whe | +0.057 |
Mine | +0.056 |
arrass | +0.055 |
Carl | +0.054 |
Neg logit contributions | |
oral | -0.020 |
?'" | -0.019 |
?" | -0.018 |
moral | -0.017 |
invites | -0.017 |
Token | chat |
Token ID | 8537 |
Feature activation | +8.73e-01 |
Pos logit contributions | |
oral | +0.026 |
?'" | +0.026 |
?" | +0.025 |
moral | +0.025 |
invites | +0.025 |
Neg logit contributions | |
tug | -0.030 |
whe | -0.029 |
Carl | -0.028 |
Mine | -0.028 |
arrass | -0.027 |
Token | and |
Token ID | 290 |
Feature activation | +2.82e-01 |
Pos logit contributions | |
tug | +0.019 |
whe | +0.018 |
fought | +0.017 |
Carl | +0.017 |
Mine | +0.017 |
Neg logit contributions | |
oral | -0.022 |
?'" | -0.022 |
moral | -0.021 |
?" | -0.021 |
Prin | -0.021 |
Token | share |
Token ID | 2648 |
Feature activation | -6.58e-02 |
Pos logit contributions | |
whe | +0.006 |
tug | +0.006 |
Mine | +0.006 |
iled | +0.006 |
arrass | +0.006 |
Neg logit contributions | |
invites | -0.007 |
?'" | -0.007 |
oral | -0.007 |
?" | -0.007 |
decides | -0.007 |
Token | forever |
Token ID | 8097 |
Feature activation | -6.22e-01 |
Pos logit contributions | |
?'" | +0.020 |
oral | +0.020 |
| +0.020 |
?" | +0.020 |
Fore | +0.019 |
Neg logit contributions | |
tug | -0.031 |
skate | -0.030 |
whe | -0.030 |
Mine | -0.029 |
arrass | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +1.51e+00 |
Pos logit contributions | |
oral | +0.013 |
moral | +0.013 |
| +0.012 |
?'" | +0.012 |
invites | +0.012 |
Neg logit contributions | |
Mine | -0.016 |
whe | -0.016 |
tug | -0.016 |
oa | -0.015 |
Kenny | -0.015 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +2.76e-01 |
Pos logit contributions | |
whe | +0.044 |
Carl | +0.043 |
Mine | +0.043 |
tug | +0.043 |
Andy | +0.041 |
Neg logit contributions | |
adies | -0.028 |
oral | -0.026 |
?'" | -0.025 |
?" | -0.025 |
moral | -0.024 |
Token | On |
Token ID | 2202 |
Feature activation | +1.43e+00 |
Pos logit contributions | |
whe | +0.008 |
Carl | +0.008 |
Mine | +0.008 |
Kenny | +0.008 |
Andy | +0.008 |
Neg logit contributions | |
oral | -0.005 |
Him | -0.005 |
adies | -0.005 |
moral | -0.004 |
decides | -0.004 |
Token | a |
Token ID | 257 |
Feature activation | +2.63e+00 |
Pos logit contributions | |
tug | +0.026 |
Carl | +0.025 |
whe | +0.024 |
Andy | +0.023 |
Mine | +0.023 |
Neg logit contributions | |
oral | -0.042 |
adies | -0.040 |
?'" | -0.039 |
decides | -0.039 |
invites | -0.038 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.50e+00 |
Pos logit contributions | |
tug | +0.059 |
whe | +0.056 |
Carl | +0.054 |
Mine | +0.053 |
fought | +0.051 |
Neg logit contributions | |
oral | -0.067 |
?'" | -0.065 |
adies | -0.064 |
moral | -0.062 |
Fore | -0.061 |
Token | day |
Token ID | 1110 |
Feature activation | +1.37e+00 |
Pos logit contributions | |
tug | +0.122 |
whe | +0.118 |
Mine | +0.116 |
Carl | +0.113 |
fought | +0.112 |
Neg logit contributions | |
oral | -0.090 |
?'" | -0.087 |
?" | -0.086 |
moral | -0.085 |
adies | -0.083 |
Token | , |
Token ID | 11 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
tug | +0.036 |
whe | +0.035 |
arrass | +0.034 |
Mine | +0.034 |
fought | +0.034 |
Neg logit contributions | |
oral | -0.028 |
?" | -0.028 |
?'" | -0.027 |
adies | -0.027 |
| -0.027 |
Token | a |
Token ID | 257 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
tug | +0.029 |
whe | +0.029 |
Mine | +0.028 |
Carl | +0.027 |
fought | +0.027 |
Neg logit contributions | |
oral | -0.028 |
?'" | -0.027 |
adies | -0.027 |
?" | -0.026 |
moral | -0.026 |
Token | mom |
Token ID | 1995 |
Feature activation | -5.59e-01 |
Pos logit contributions | |
tug | +0.033 |
whe | +0.033 |
Carl | +0.032 |
Mine | +0.032 |
fought | +0.031 |
Neg logit contributions | |
oral | -0.021 |
?'" | -0.019 |
moral | -0.019 |
?" | -0.018 |
Fore | -0.018 |
Token | my |
Token ID | 1820 |
Feature activation | +4.86e-02 |
Pos logit contributions | |
oral | +0.013 |
adies | +0.012 |
moral | +0.012 |
?'" | +0.012 |
Fore | +0.012 |
Neg logit contributions | |
tug | -0.014 |
whe | -0.013 |
fought | -0.013 |
cream | -0.013 |
urt | -0.012 |
Token | together |
Token ID | 1978 |
Feature activation | +2.29e-01 |
Pos logit contributions | |
tug | +0.000 |
Carl | +0.000 |
arrass | +0.000 |
Mine | +0.000 |
whe | +0.000 |
Neg logit contributions | |
?" | -0.000 |
?'" | -0.000 |
invites | -0.000 |
oral | -0.000 |
| -0.000 |
Token | . |
Token ID | 13 |
Feature activation | +8.89e-01 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.005 |
Carl | +0.005 |
Mine | +0.005 |
arrass | +0.005 |
Neg logit contributions | |
?'" | -0.005 |
?" | -0.005 |
oral | -0.005 |
moral | -0.005 |
invites | -0.005 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +9.86e-02 |
Pos logit contributions | |
tug | +0.027 |
whe | +0.027 |
Carl | +0.026 |
Mine | +0.026 |
fought | +0.025 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
adies | -0.014 |
moral | -0.013 |
?" | -0.013 |
Token | On |
Token ID | 2202 |
Feature activation | +1.33e+00 |
Pos logit contributions | |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
Kenny | +0.002 |
tug | +0.002 |
Neg logit contributions | |
oral | -0.002 |
Him | -0.002 |
adies | -0.002 |
decides | -0.002 |
moral | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | +2.69e+00 |
Pos logit contributions | |
tug | +0.024 |
Carl | +0.023 |
whe | +0.022 |
Andy | +0.022 |
Mine | +0.022 |
Neg logit contributions | |
oral | -0.039 |
adies | -0.038 |
decides | -0.037 |
?'" | -0.037 |
invites | -0.036 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.44e+00 |
Pos logit contributions | |
tug | +0.066 |
whe | +0.061 |
Carl | +0.059 |
Mine | +0.058 |
skate | +0.057 |
Neg logit contributions | |
oral | -0.065 |
adies | -0.062 |
?'" | -0.061 |
decides | -0.059 |
moral | -0.058 |
Token | day |
Token ID | 1110 |
Feature activation | +1.39e+00 |
Pos logit contributions | |
tug | +0.112 |
whe | +0.108 |
Carl | +0.103 |
Mine | +0.103 |
fought | +0.100 |
Neg logit contributions | |
oral | -0.100 |
?'" | -0.096 |
adies | -0.093 |
moral | -0.092 |
?" | -0.091 |
Token | , |
Token ID | 11 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
tug | +0.033 |
whe | +0.032 |
Mine | +0.030 |
Carl | +0.030 |
fought | +0.029 |
Neg logit contributions | |
oral | -0.033 |
?'" | -0.032 |
?" | -0.032 |
adies | -0.031 |
moral | -0.031 |
Token | Joe |
Token ID | 5689 |
Feature activation | +2.64e-01 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.026 |
Mine | +0.024 |
Carl | +0.024 |
fought | +0.023 |
Neg logit contributions | |
oral | -0.031 |
?'" | -0.030 |
adies | -0.029 |
moral | -0.029 |
?" | -0.029 |
Token | decided |
Token ID | 3066 |
Feature activation | -4.76e-01 |
Pos logit contributions | |
tug | +0.005 |
fought | +0.005 |
iled | +0.005 |
baseball | +0.005 |
skate | +0.004 |
Neg logit contributions | |
oral | -0.008 |
Fore | -0.007 |
moral | -0.007 |
narrative | -0.007 |
adies | -0.007 |
Token | he |
Token ID | 339 |
Feature activation | -6.97e-01 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.013 |
adies | +0.012 |
?" | +0.012 |
moral | +0.012 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.009 |
Carl | -0.009 |
Mine | -0.009 |
fought | -0.008 |
Token | end |
Token ID | 886 |
Feature activation | -1.53e-01 |
Pos logit contributions | |
oral | +0.004 |
moral | +0.004 |
?'" | +0.004 |
Fore | +0.003 |
adies | +0.003 |
Neg logit contributions | |
tug | -0.016 |
whe | -0.016 |
Carl | -0.015 |
Mine | -0.015 |
fought | -0.015 |
Token | , |
Token ID | 11 |
Feature activation | -2.33e-01 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.004 |
adies | +0.004 |
moral | +0.004 |
Fore | +0.004 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
Carl | -0.003 |
Mine | -0.003 |
fought | -0.003 |
Token | Tom |
Token ID | 4186 |
Feature activation | -2.44e-01 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.006 |
adies | +0.006 |
moral | +0.006 |
?" | +0.006 |
Neg logit contributions | |
tug | -0.004 |
whe | -0.004 |
Carl | -0.004 |
Mine | -0.004 |
fought | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | +2.02e+00 |
Pos logit contributions | |
oral | +0.006 |
?'" | +0.006 |
adies | +0.006 |
Fore | +0.005 |
moral | +0.005 |
Neg logit contributions | |
tug | -0.006 |
fought | -0.005 |
whe | -0.005 |
Carl | -0.005 |
Mine | -0.005 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.15e+00 |
Pos logit contributions | |
tug | +0.053 |
whe | +0.052 |
Mine | +0.049 |
Carl | +0.049 |
fought | +0.048 |
Neg logit contributions | |
oral | -0.043 |
?'" | -0.042 |
moral | -0.040 |
adies | -0.040 |
?" | -0.040 |
Token | . |
Token ID | 13 |
Feature activation | +4.35e+00 |
Pos logit contributions | |
tug | +0.030 |
whe | +0.029 |
skate | +0.026 |
Mine | +0.026 |
drib | +0.026 |
Neg logit contributions | |
oral | -0.025 |
?'" | -0.025 |
| -0.025 |
?" | -0.024 |
Him | -0.024 |
Token | Fox |
Token ID | 5426 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
tug | +0.120 |
whe | +0.119 |
fought | +0.108 |
Mine | +0.107 |
arrass | +0.107 |
Neg logit contributions | |
oral | -0.090 |
?'" | -0.087 |
moral | -0.085 |
| -0.083 |
invites | -0.082 |
Token | stopped |
Token ID | 5025 |
Feature activation | +3.53e-01 |
Pos logit contributions | |
whe | +0.044 |
Mine | +0.043 |
tug | +0.043 |
arrass | +0.042 |
urt | +0.041 |
Neg logit contributions | |
oral | -0.030 |
decides | -0.029 |
invites | -0.029 |
?" | -0.029 |
?'" | -0.028 |
Token | being |
Token ID | 852 |
Feature activation | +1.80e+00 |
Pos logit contributions | |
whe | +0.008 |
Mine | +0.007 |
arrass | +0.007 |
cream | +0.007 |
hy | +0.006 |
Neg logit contributions | |
?" | -0.009 |
moral | -0.009 |
| -0.009 |
?'" | -0.009 |
decides | -0.009 |
Token | friends |
Token ID | 2460 |
Feature activation | -1.83e-01 |
Pos logit contributions | |
whe | +0.049 |
tug | +0.048 |
urt | +0.046 |
skate | +0.045 |
etti | +0.045 |
Neg logit contributions | |
?" | -0.039 |
invites | -0.034 |
?'" | -0.033 |
meets | -0.033 |
decides | -0.032 |
Token | because |
Token ID | 780 |
Feature activation | +7.35e-01 |
Pos logit contributions | |
?'" | +0.004 |
?" | +0.004 |
oral | +0.003 |
invites | +0.003 |
decides | +0.003 |
Neg logit contributions | |
tug | -0.005 |
whe | -0.005 |
Carl | -0.005 |
urt | -0.005 |
Mine | -0.005 |
Token | happy |
Token ID | 3772 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
tug | +0.014 |
whe | +0.013 |
Carl | +0.013 |
Mine | +0.013 |
fought | +0.012 |
Neg logit contributions | |
oral | -0.014 |
?'" | -0.014 |
moral | -0.014 |
?" | -0.013 |
Fore | -0.013 |
Token | ! |
Token ID | 0 |
Feature activation | +8.27e-01 |
Pos logit contributions | |
tug | +0.038 |
whe | +0.036 |
Mine | +0.035 |
fought | +0.035 |
Laser | +0.034 |
Neg logit contributions | |
?" | -0.034 |
?'" | -0.032 |
oral | -0.032 |
invites | -0.031 |
| -0.031 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +4.79e-02 |
Pos logit contributions | |
Carl | +0.021 |
Mine | +0.020 |
Andy | +0.018 |
Kenny | +0.018 |
Ted | +0.018 |
Neg logit contributions | |
adies | -0.021 |
oral | -0.020 |
airport | -0.020 |
moral | -0.018 |
decides | -0.018 |
Token | On |
Token ID | 2202 |
Feature activation | +1.29e+00 |
Pos logit contributions | |
Carl | +0.001 |
whe | +0.001 |
Mine | +0.001 |
Kenny | +0.001 |
baseball | +0.001 |
Neg logit contributions | |
oral | -0.001 |
Him | -0.001 |
moral | -0.001 |
adies | -0.001 |
decides | -0.001 |
Token | a |
Token ID | 257 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
tug | +0.023 |
Carl | +0.022 |
whe | +0.021 |
Mine | +0.020 |
Andy | +0.020 |
Neg logit contributions | |
oral | -0.039 |
adies | -0.037 |
decides | -0.036 |
?'" | -0.036 |
Fore | -0.035 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.34e+00 |
Pos logit contributions | |
tug | +0.054 |
whe | +0.050 |
Carl | +0.048 |
Mine | +0.048 |
skate | +0.046 |
Neg logit contributions | |
oral | -0.066 |
?'" | -0.063 |
adies | -0.063 |
moral | -0.060 |
decides | -0.060 |
Token | day |
Token ID | 1110 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
tug | +0.116 |
whe | +0.112 |
Mine | +0.109 |
Carl | +0.107 |
fought | +0.106 |
Neg logit contributions | |
oral | -0.090 |
?'" | -0.087 |
?" | -0.085 |
moral | -0.084 |
adies | -0.082 |
Token | , |
Token ID | 11 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
tug | +0.031 |
whe | +0.031 |
arrass | +0.030 |
Mine | +0.029 |
fought | +0.029 |
Neg logit contributions | |
oral | -0.026 |
?" | -0.025 |
?'" | -0.025 |
adies | -0.024 |
| -0.024 |
Token | a |
Token ID | 257 |
Feature activation | +1.10e+00 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.026 |
Mine | +0.024 |
Carl | +0.024 |
fought | +0.024 |
Neg logit contributions | |
oral | -0.023 |
?'" | -0.022 |
adies | -0.022 |
?" | -0.022 |
moral | -0.022 |
Token | little |
Token ID | 1310 |
Feature activation | +2.23e-01 |
Pos logit contributions | |
tug | +0.032 |
whe | +0.032 |
Carl | +0.031 |
Mine | +0.030 |
fought | +0.030 |
Neg logit contributions | |
oral | -0.020 |
?'" | -0.019 |
moral | -0.018 |
?" | -0.018 |
Fore | -0.018 |
Token | girl |
Token ID | 2576 |
Feature activation | -8.80e-01 |
Pos logit contributions | |
whe | +0.007 |
tug | +0.007 |
fought | +0.007 |
arrass | +0.007 |
Mine | +0.007 |
Neg logit contributions | |
oral | -0.003 |
?" | -0.003 |
?'" | -0.003 |
moral | -0.003 |
| -0.003 |
Token | day |
Token ID | 1110 |
Feature activation | +1.87e-01 |
Pos logit contributions | |
oral | +0.005 |
?" | +0.005 |
moral | +0.005 |
?'" | +0.005 |
Fore | +0.005 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.008 |
Mine | -0.008 |
fought | -0.008 |
Carl | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | -4.25e-02 |
Pos logit contributions | |
whe | +0.005 |
tug | +0.005 |
ci | +0.005 |
arrass | +0.005 |
Mine | +0.005 |
Neg logit contributions | |
?" | -0.004 |
?'" | -0.004 |
oral | -0.004 |
| -0.004 |
adies | -0.004 |
Token | Tim |
Token ID | 5045 |
Feature activation | -6.25e-01 |
Pos logit contributions | |
?" | +0.001 |
| +0.001 |
?'" | +0.001 |
moral | +0.001 |
God | +0.001 |
Neg logit contributions | |
whe | -0.001 |
ci | -0.001 |
beh | -0.001 |
iled | -0.001 |
iting | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +2.08e+00 |
Pos logit contributions | |
oral | +0.018 |
Him | +0.016 |
adies | +0.016 |
?'" | +0.015 |
Fore | +0.015 |
Neg logit contributions | |
fought | -0.014 |
tug | -0.014 |
Carl | -0.012 |
iled | -0.012 |
skate | -0.012 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
whe | +0.065 |
tug | +0.065 |
iled | +0.062 |
fought | +0.060 |
ci | +0.059 |
Neg logit contributions | |
?" | -0.033 |
?'" | -0.033 |
moral | -0.032 |
oral | -0.032 |
invites | -0.031 |
Token | . |
Token ID | 13 |
Feature activation | +4.31e+00 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.025 |
Mine | +0.023 |
arrass | +0.023 |
fought | +0.023 |
Neg logit contributions | |
?'" | -0.022 |
oral | -0.022 |
?" | -0.022 |
| -0.022 |
Him | -0.021 |
Token | Bear |
Token ID | 14732 |
Feature activation | +1.39e+00 |
Pos logit contributions | |
tug | +0.102 |
whe | +0.101 |
fought | +0.093 |
arrass | +0.092 |
Mine | +0.092 |
Neg logit contributions | |
oral | -0.102 |
?'" | -0.101 |
moral | -0.098 |
?" | -0.097 |
invites | -0.096 |
Token | went |
Token ID | 1816 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
whe | +0.045 |
tug | +0.045 |
Mine | +0.043 |
oa | +0.043 |
arrass | +0.043 |
Neg logit contributions | |
decides | -0.020 |
oral | -0.019 |
?" | -0.019 |
invites | -0.019 |
| -0.019 |
Token | to |
Token ID | 284 |
Feature activation | -1.37e-01 |
Pos logit contributions | |
?" | +0.032 |
oral | +0.031 |
invites | +0.031 |
?'" | +0.031 |
adies | +0.030 |
Neg logit contributions | |
tug | -0.036 |
whe | -0.034 |
Mine | -0.033 |
arrass | -0.032 |
Carl | -0.032 |
Token | the |
Token ID | 262 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
oral | +0.004 |
adies | +0.003 |
?'" | +0.003 |
moral | +0.003 |
Fore | +0.003 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
Carl | -0.003 |
Mine | -0.003 |
fought | -0.003 |
Token | park |
Token ID | 3952 |
Feature activation | -1.43e+00 |
Pos logit contributions | |
oral | +0.000 |
?'" | +0.000 |
moral | +0.000 |
?" | +0.000 |
invites | +0.000 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
fought | -0.000 |
Carl | -0.000 |
iled | -0.000 |
Token | after |
Token ID | 706 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
?'" | +0.011 |
oral | +0.011 |
adies | +0.010 |
moral | +0.010 |
?" | +0.010 |
Neg logit contributions | |
whe | -0.007 |
tug | -0.006 |
Mine | -0.006 |
Carl | -0.006 |
Laser | -0.006 |
Token | ! |
Token ID | 0 |
Feature activation | +8.61e-01 |
Pos logit contributions | |
tug | +0.021 |
whe | +0.020 |
Mine | +0.019 |
Carl | +0.019 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.029 |
?'" | -0.028 |
adies | -0.027 |
moral | -0.027 |
?" | -0.027 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
Carl | +0.023 |
whe | +0.023 |
Andy | +0.022 |
Mine | +0.022 |
Kenny | +0.022 |
Neg logit contributions | |
adies | -0.019 |
oral | -0.017 |
decides | -0.017 |
invites | -0.016 |
airport | -0.016 |
Token | On |
Token ID | 2202 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
whe | +0.003 |
Carl | +0.003 |
Mine | +0.003 |
Kenny | +0.003 |
Andy | +0.003 |
Neg logit contributions | |
oral | -0.002 |
Him | -0.002 |
adies | -0.002 |
moral | -0.002 |
decides | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | +2.46e+00 |
Pos logit contributions | |
tug | +0.022 |
Carl | +0.021 |
Andy | +0.020 |
Kenny | +0.020 |
Mine | +0.019 |
Neg logit contributions | |
oral | -0.037 |
adies | -0.036 |
decides | -0.035 |
?'" | -0.035 |
invites | -0.034 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.30e+00 |
Pos logit contributions | |
tug | +0.059 |
whe | +0.054 |
Carl | +0.052 |
Mine | +0.051 |
skate | +0.051 |
Neg logit contributions | |
oral | -0.062 |
adies | -0.059 |
?'" | -0.058 |
decides | -0.056 |
Him | -0.055 |
Token | day |
Token ID | 1110 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
tug | +0.105 |
whe | +0.101 |
Carl | +0.097 |
Mine | +0.096 |
fought | +0.094 |
Neg logit contributions | |
oral | -0.100 |
?'" | -0.096 |
adies | -0.094 |
moral | -0.092 |
Fore | -0.092 |
Token | , |
Token ID | 11 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
tug | +0.023 |
whe | +0.021 |
Carl | +0.021 |
Mine | +0.020 |
fought | +0.019 |
Neg logit contributions | |
oral | -0.032 |
?'" | -0.030 |
moral | -0.029 |
Fore | -0.029 |
adies | -0.029 |
Token | Billy |
Token ID | 15890 |
Feature activation | +4.38e-01 |
Pos logit contributions | |
tug | +0.023 |
whe | +0.023 |
Mine | +0.022 |
Carl | +0.021 |
fought | +0.021 |
Neg logit contributions | |
oral | -0.025 |
?'" | -0.024 |
adies | -0.023 |
moral | -0.023 |
?" | -0.023 |
Token | was |
Token ID | 373 |
Feature activation | +6.01e-01 |
Pos logit contributions | |
tug | +0.010 |
fought | +0.009 |
iled | +0.009 |
Carl | +0.009 |
whe | +0.009 |
Neg logit contributions | |
oral | -0.012 |
moral | -0.011 |
?'" | -0.011 |
adies | -0.010 |
Fore | -0.010 |
Token | out |
Token ID | 503 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.010 |
Mine | +0.009 |
Carl | +0.009 |
baseball | +0.009 |
Neg logit contributions | |
oral | -0.020 |
?'" | -0.018 |
adies | -0.018 |
moral | -0.018 |
decides | -0.017 |
Token | named |
Token ID | 3706 |
Feature activation | -3.12e-01 |
Pos logit contributions | |
oral | +0.023 |
?'" | +0.022 |
adies | +0.021 |
moral | +0.021 |
Fore | +0.021 |
Neg logit contributions | |
tug | -0.027 |
whe | -0.025 |
Carl | -0.024 |
Mine | -0.024 |
fought | -0.024 |
Token | Amelia |
Token ID | 49769 |
Feature activation | -1.66e-01 |
Pos logit contributions | |
oral | +0.012 |
narrative | +0.010 |
Fore | +0.010 |
?'" | +0.010 |
moral | +0.010 |
Neg logit contributions | |
Tim | -0.005 |
Carl | -0.004 |
Andy | -0.004 |
Liz | -0.004 |
tug | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | +9.45e-01 |
Pos logit contributions | |
oral | +0.006 |
adies | +0.006 |
meantime | +0.006 |
?'" | +0.006 |
narrative | +0.006 |
Neg logit contributions | |
tug | -0.002 |
iled | -0.002 |
fought | -0.002 |
e | -0.001 |
ci | -0.001 |
Token | On |
Token ID | 1550 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
whe | +0.020 |
Rob | +0.020 |
Andy | +0.020 |
tug | +0.020 |
Mix | +0.020 |
Neg logit contributions | |
oral | -0.023 |
?'" | -0.021 |
moral | -0.020 |
adies | -0.020 |
Fore | -0.020 |
Token | a |
Token ID | 257 |
Feature activation | +1.45e+00 |
Pos logit contributions | |
tug | +0.022 |
whe | +0.022 |
Carl | +0.021 |
Mine | +0.021 |
Andy | +0.020 |
Neg logit contributions | |
oral | -0.025 |
?'" | -0.024 |
adies | -0.024 |
decides | -0.024 |
invites | -0.023 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.26e+00 |
Pos logit contributions | |
tug | +0.035 |
whe | +0.032 |
Carl | +0.031 |
Mine | +0.031 |
skate | +0.030 |
Neg logit contributions | |
oral | -0.036 |
adies | -0.034 |
?'" | -0.034 |
decides | -0.033 |
moral | -0.033 |
Token | day |
Token ID | 1110 |
Feature activation | +7.90e-01 |
Pos logit contributions | |
tug | +0.095 |
whe | +0.092 |
Carl | +0.088 |
ci | +0.085 |
Mine | +0.085 |
Neg logit contributions | |
oral | -0.109 |
?'" | -0.104 |
adies | -0.103 |
decides | -0.101 |
Fore | -0.100 |
Token | , |
Token ID | 11 |
Feature activation | +4.45e-01 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Mine | +0.018 |
arrass | +0.018 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.018 |
?'" | -0.017 |
?" | -0.017 |
adies | -0.017 |
Fore | -0.016 |
Token | her |
Token ID | 607 |
Feature activation | -6.66e-02 |
Pos logit contributions | |
Andy | +0.009 |
tug | +0.009 |
Carl | +0.009 |
Kenny | +0.009 |
Bobby | +0.008 |
Neg logit contributions | |
oral | -0.013 |
decides | -0.012 |
?'" | -0.011 |
moral | -0.011 |
meets | -0.011 |
Token | mom |
Token ID | 1995 |
Feature activation | +6.32e-02 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
moral | +0.001 |
?" | +0.001 |
Fore | +0.001 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
fought | -0.002 |
Token | took |
Token ID | 1718 |
Feature activation | -1.07e-01 |
Pos logit contributions | |
fought | +0.001 |
Tim | +0.001 |
Johnny | +0.001 |
ci | +0.001 |
Andy | +0.001 |
Neg logit contributions | |
oral | -0.002 |
adies | -0.002 |
narrative | -0.002 |
embassy | -0.002 |
Fore | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | -1.63e-01 |
Pos logit contributions | |
tug | +0.005 |
oa | +0.005 |
arrass | +0.005 |
ked | +0.005 |
Carl | +0.005 |
Neg logit contributions | |
?" | -0.003 |
?'" | -0.003 |
moral | -0.003 |
oral | -0.003 |
airport | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +4.46e-02 |
Pos logit contributions | |
oral | +0.003 |
adies | +0.003 |
?'" | +0.003 |
moral | +0.003 |
?" | +0.003 |
Neg logit contributions | |
whe | -0.004 |
tug | -0.004 |
Mine | -0.004 |
Carl | -0.004 |
Kenny | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +6.29e-02 |
Pos logit contributions | |
whe | +0.001 |
tug | +0.001 |
Kenny | +0.001 |
Carl | +0.001 |
skate | +0.001 |
Neg logit contributions | |
adies | -0.001 |
decides | -0.001 |
Him | -0.001 |
?'" | -0.001 |
oral | -0.001 |
Token | After |
Token ID | 3260 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
tug | +0.002 |
Carl | +0.002 |
whe | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.001 |
moral | -0.001 |
?'" | -0.001 |
Fore | -0.001 |
?" | -0.001 |
Token | Wh |
Token ID | 854 |
Feature activation | +1.80e+00 |
Pos logit contributions | |
arrass | +0.031 |
ci | +0.030 |
imm | +0.029 |
ike | +0.029 |
urt | +0.028 |
Neg logit contributions | |
| -0.018 |
God | -0.018 |
moral | -0.018 |
oral | -0.017 |
airport | -0.017 |
Token | isk |
Token ID | 1984 |
Feature activation | +4.18e+00 |
Pos logit contributions | |
tug | +0.036 |
whe | +0.035 |
Mine | +0.033 |
fought | +0.032 |
Carl | +0.032 |
Neg logit contributions | |
?'" | -0.048 |
oral | -0.048 |
?" | -0.046 |
adies | -0.046 |
invites | -0.046 |
Token | ers |
Token ID | 364 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
tug | +0.114 |
whe | +0.110 |
Carl | +0.106 |
Mine | +0.106 |
fought | +0.103 |
Neg logit contributions | |
oral | -0.085 |
?'" | -0.081 |
adies | -0.079 |
moral | -0.078 |
Fore | -0.077 |
Token | got |
Token ID | 1392 |
Feature activation | +7.24e-01 |
Pos logit contributions | |
tug | +0.035 |
whe | +0.035 |
Mine | +0.033 |
Carl | +0.033 |
fought | +0.032 |
Neg logit contributions | |
oral | -0.032 |
?'" | -0.030 |
moral | -0.029 |
adies | -0.029 |
?" | -0.029 |
Token | better |
Token ID | 1365 |
Feature activation | +8.69e-01 |
Pos logit contributions | |
tug | +0.016 |
whe | +0.015 |
Carl | +0.014 |
Mine | +0.014 |
fought | +0.014 |
Neg logit contributions | |
oral | -0.019 |
?'" | -0.018 |
adies | -0.018 |
moral | -0.018 |
Fore | -0.017 |
Token | , |
Token ID | 11 |
Feature activation | +4.08e-02 |
Pos logit contributions | |
whe | +0.026 |
Carl | +0.025 |
Mine | +0.025 |
Laser | +0.024 |
arrass | +0.024 |
Neg logit contributions | |
?" | -0.015 |
invites | -0.015 |
oral | -0.015 |
| -0.015 |
Him | -0.015 |
Token | he |
Token ID | 339 |
Feature activation | -1.86e-01 |
Pos logit contributions | |
tug | +0.001 |
iled | +0.001 |
arrass | +0.001 |
whe | +0.001 |
urt | +0.001 |
Neg logit contributions | |
moral | -0.001 |
oral | -0.001 |
Fore | -0.001 |
| -0.001 |
airport | -0.001 |
Token | day |
Token ID | 1110 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
oral | +0.003 |
moral | +0.003 |
?'" | +0.003 |
?" | +0.003 |
adies | +0.002 |
Neg logit contributions | |
tug | -0.004 |
whe | -0.004 |
Mine | -0.004 |
Carl | -0.004 |
fought | -0.004 |
Token | , |
Token ID | 11 |
Feature activation | +1.40e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
moral | -0.003 |
adies | -0.003 |
Fore | -0.003 |
Token | Lucy |
Token ID | 22162 |
Feature activation | -2.39e-01 |
Pos logit contributions | |
tug | +0.003 |
whe | +0.003 |
Mine | +0.003 |
Carl | +0.003 |
fought | +0.003 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.004 |
?" | -0.004 |
adies | -0.003 |
moral | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +1.86e+00 |
Pos logit contributions | |
oral | +0.006 |
?'" | +0.006 |
adies | +0.006 |
Fore | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.006 |
fought | -0.005 |
iled | -0.005 |
Carl | -0.005 |
Mine | -0.005 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
tug | +0.054 |
whe | +0.053 |
Carl | +0.050 |
Mine | +0.050 |
fought | +0.049 |
Neg logit contributions | |
oral | -0.034 |
?'" | -0.033 |
?" | -0.032 |
moral | -0.032 |
invites | -0.031 |
Token | . |
Token ID | 13 |
Feature activation | +4.16e+00 |
Pos logit contributions | |
tug | +0.029 |
whe | +0.028 |
Mine | +0.027 |
Carl | +0.026 |
fought | +0.026 |
Neg logit contributions | |
oral | -0.030 |
?'" | -0.030 |
?" | -0.029 |
moral | -0.028 |
adies | -0.028 |
Token | Bear |
Token ID | 14732 |
Feature activation | +1.50e+00 |
Pos logit contributions | |
tug | +0.092 |
whe | +0.089 |
Mine | +0.083 |
fought | +0.082 |
Carl | +0.082 |
Neg logit contributions | |
oral | -0.107 |
?'" | -0.103 |
moral | -0.100 |
?" | -0.099 |
adies | -0.099 |
Token | went |
Token ID | 1816 |
Feature activation | -1.49e+00 |
Pos logit contributions | |
whe | +0.044 |
tug | +0.044 |
Mine | +0.043 |
Carl | +0.041 |
arrass | +0.041 |
Neg logit contributions | |
oral | -0.026 |
decides | -0.025 |
invites | -0.025 |
?" | -0.024 |
moral | -0.024 |
Token | to |
Token ID | 284 |
Feature activation | -1.79e-01 |
Pos logit contributions | |
oral | +0.036 |
?'" | +0.035 |
?" | +0.035 |
adies | +0.035 |
invites | +0.034 |
Neg logit contributions | |
tug | -0.035 |
whe | -0.033 |
Mine | -0.032 |
Carl | -0.031 |
arrass | -0.031 |
Token | the |
Token ID | 262 |
Feature activation | +7.26e-02 |
Pos logit contributions | |
oral | +0.005 |
adies | +0.005 |
Fore | +0.005 |
?'" | +0.005 |
moral | +0.005 |
Neg logit contributions | |
tug | -0.004 |
Carl | -0.003 |
whe | -0.003 |
Mine | -0.003 |
Andy | -0.003 |
Token | park |
Token ID | 3952 |
Feature activation | -1.50e+00 |
Pos logit contributions | |
tug | +0.003 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
moral | -0.001 |
?" | -0.001 |
adies | -0.001 |
Token | Jen |
Token ID | 44875 |
Feature activation | +6.55e-01 |
Pos logit contributions | |
oral | +0.010 |
?'" | +0.010 |
adies | +0.009 |
invites | +0.009 |
?" | +0.009 |
Neg logit contributions | |
tug | -0.012 |
whe | -0.012 |
Carl | -0.011 |
Mine | -0.011 |
fought | -0.011 |
Token | and |
Token ID | 290 |
Feature activation | +2.64e+00 |
Pos logit contributions | |
tug | +0.016 |
whe | +0.015 |
fought | +0.014 |
Carl | +0.014 |
Mine | +0.014 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
adies | -0.015 |
Fore | -0.015 |
moral | -0.015 |
Token | Sam |
Token ID | 3409 |
Feature activation | +2.37e+00 |
Pos logit contributions | |
tug | +0.059 |
whe | +0.059 |
Mine | +0.054 |
Carl | +0.054 |
iled | +0.053 |
Neg logit contributions | |
oral | -0.065 |
?'" | -0.063 |
moral | -0.062 |
adies | -0.061 |
?" | -0.061 |
Token | and |
Token ID | 290 |
Feature activation | +2.91e+00 |
Pos logit contributions | |
whe | +0.063 |
Carl | +0.062 |
Mine | +0.062 |
tug | +0.062 |
urt | +0.060 |
Neg logit contributions | |
oral | -0.046 |
?'" | -0.045 |
decides | -0.044 |
adies | -0.044 |
?" | -0.044 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
whe | +0.049 |
tug | +0.049 |
Mine | +0.043 |
iled | +0.043 |
ci | +0.042 |
Neg logit contributions | |
oral | -0.086 |
?'" | -0.085 |
moral | -0.084 |
invites | -0.084 |
adies | -0.083 |
Token | . |
Token ID | 13 |
Feature activation | +4.15e+00 |
Pos logit contributions | |
tug | +0.039 |
whe | +0.038 |
drib | +0.036 |
Carl | +0.035 |
Kenny | +0.035 |
Neg logit contributions | |
oral | -0.028 |
| -0.027 |
Him | -0.027 |
adies | -0.027 |
?'" | -0.027 |
Token | Lee |
Token ID | 5741 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
whe | +0.119 |
tug | +0.112 |
ci | +0.105 |
Carl | +0.105 |
drib | +0.103 |
Neg logit contributions | |
oral | -0.086 |
| -0.084 |
invites | -0.081 |
moral | -0.079 |
decides | -0.079 |
Token | were |
Token ID | 547 |
Feature activation | -2.89e-01 |
Pos logit contributions | |
Carl | +0.047 |
Mine | +0.047 |
whe | +0.046 |
urt | +0.045 |
arrass | +0.045 |
Neg logit contributions | |
invites | -0.032 |
oral | -0.032 |
decides | -0.031 |
?" | -0.031 |
moral | -0.030 |
Token | covered |
Token ID | 5017 |
Feature activation | +1.35e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
?" | +0.007 |
moral | +0.006 |
adies | +0.006 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.006 |
Mine | -0.006 |
arrass | -0.006 |
Token | in |
Token ID | 287 |
Feature activation | -1.03e-01 |
Pos logit contributions | |
tug | +0.032 |
whe | +0.032 |
Mine | +0.030 |
arrass | +0.030 |
fought | +0.029 |
Neg logit contributions | |
?" | -0.031 |
?'" | -0.030 |
oral | -0.030 |
moral | -0.030 |
adies | -0.029 |
Token | sticky |
Token ID | 23408 |
Feature activation | +3.51e-01 |
Pos logit contributions | |
moral | +0.002 |
oral | +0.002 |
?" | +0.002 |
?'" | +0.002 |
invites | +0.002 |
Neg logit contributions | |
whe | -0.003 |
Carl | -0.003 |
arrass | -0.003 |
seek | -0.003 |
Laser | -0.003 |
Token | lion |
Token ID | 18744 |
Feature activation | +1.26e+00 |
Pos logit contributions | |
oral | +0.045 |
?'" | +0.043 |
adies | +0.042 |
Fore | +0.042 |
moral | +0.041 |
Neg logit contributions | |
tug | -0.035 |
whe | -0.034 |
Carl | -0.032 |
Mine | -0.032 |
fought | -0.031 |
Token | who |
Token ID | 508 |
Feature activation | +1.91e-01 |
Pos logit contributions | |
tug | +0.028 |
whe | +0.027 |
Carl | +0.026 |
Mine | +0.025 |
fought | +0.025 |
Neg logit contributions | |
oral | -0.033 |
?'" | -0.031 |
adies | -0.030 |
moral | -0.030 |
Fore | -0.030 |
Token | lived |
Token ID | 5615 |
Feature activation | +3.95e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.005 |
Mine | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.004 |
adies | -0.004 |
moral | -0.004 |
Fore | -0.004 |
Token | in |
Token ID | 287 |
Feature activation | +6.54e-01 |
Pos logit contributions | |
tug | +0.006 |
Carl | +0.006 |
whe | +0.006 |
fought | +0.006 |
Andy | +0.006 |
Neg logit contributions | |
oral | -0.012 |
?'" | -0.012 |
decides | -0.012 |
invites | -0.011 |
adies | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
tug | +0.017 |
whe | +0.017 |
Mine | +0.016 |
urt | +0.015 |
arrass | +0.015 |
Neg logit contributions | |
oral | -0.014 |
?'" | -0.013 |
?" | -0.013 |
moral | -0.013 |
invites | -0.013 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.15e+00 |
Pos logit contributions | |
tug | +0.028 |
Carl | +0.025 |
Mine | +0.024 |
baseball | +0.024 |
skate | +0.024 |
Neg logit contributions | |
oral | -0.028 |
adies | -0.027 |
?'" | -0.026 |
Him | -0.026 |
decides | -0.024 |
Token | forest |
Token ID | 8222 |
Feature activation | -6.95e-02 |
Pos logit contributions | |
tug | +0.106 |
ci | +0.102 |
Carl | +0.101 |
whe | +0.100 |
baseball | +0.100 |
Neg logit contributions | |
oral | -0.092 |
adies | -0.086 |
Fore | -0.084 |
Him | -0.083 |
decides | -0.082 |
Token | . |
Token ID | 13 |
Feature activation | +5.46e-02 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
moral | +0.002 |
Fore | +0.002 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | He |
Token ID | 679 |
Feature activation | +3.07e-01 |
Pos logit contributions | |
Carl | +0.001 |
Andy | +0.001 |
Mine | +0.001 |
whe | +0.001 |
tug | +0.001 |
Neg logit contributions | |
oral | -0.001 |
Fore | -0.001 |
adies | -0.001 |
?'" | -0.001 |
moral | -0.001 |
Token | was |
Token ID | 373 |
Feature activation | +1.09e-02 |
Pos logit contributions | |
tug | +0.007 |
fought | +0.007 |
Carl | +0.006 |
whe | +0.006 |
skate | +0.006 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
adies | -0.008 |
Fore | -0.007 |
Him | -0.007 |
Token | very |
Token ID | 845 |
Feature activation | +7.29e-01 |
Pos logit contributions | |
tug | +0.000 |
whe | +0.000 |
Carl | +0.000 |
Mine | +0.000 |
fought | +0.000 |
Neg logit contributions | |
oral | -0.000 |
?'" | -0.000 |
adies | -0.000 |
moral | -0.000 |
Fore | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | +3.46e-01 |
Pos logit contributions | |
whe | +0.007 |
Carl | +0.006 |
tug | +0.006 |
Kenny | +0.006 |
Andy | +0.006 |
Neg logit contributions | |
adies | -0.007 |
decides | -0.006 |
airport | -0.006 |
Him | -0.006 |
invites | -0.006 |
Token | The |
Token ID | 464 |
Feature activation | -6.71e-01 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.009 |
Mine | +0.008 |
Carl | +0.008 |
iled | +0.008 |
Neg logit contributions | |
adies | -0.007 |
oral | -0.007 |
?'" | -0.007 |
?" | -0.007 |
invites | -0.007 |
Token | cub |
Token ID | 13617 |
Feature activation | +7.19e-01 |
Pos logit contributions | |
oral | +0.018 |
moral | +0.017 |
?'" | +0.017 |
?" | +0.016 |
Fore | +0.016 |
Neg logit contributions | |
whe | -0.014 |
tug | -0.014 |
Mine | -0.013 |
Carl | -0.013 |
fought | -0.013 |
Token | and |
Token ID | 290 |
Feature activation | +9.71e-01 |
Pos logit contributions | |
whe | +0.019 |
Mine | +0.018 |
tug | +0.018 |
arrass | +0.018 |
oa | +0.018 |
Neg logit contributions | |
oral | -0.015 |
moral | -0.014 |
invites | -0.013 |
decides | -0.013 |
?" | -0.013 |
Token | Mrs |
Token ID | 9074 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
whe | +0.028 |
tug | +0.027 |
iled | +0.025 |
Mine | +0.025 |
fought | +0.025 |
Neg logit contributions | |
?'" | -0.017 |
oral | -0.017 |
moral | -0.017 |
invites | -0.017 |
Him | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | +4.13e+00 |
Pos logit contributions | |
tug | +0.066 |
whe | +0.062 |
fought | +0.058 |
Mine | +0.058 |
skate | +0.057 |
Neg logit contributions | |
oral | -0.054 |
Him | -0.053 |
?'" | -0.053 |
| -0.051 |
adies | -0.050 |
Token | Jones |
Token ID | 5437 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
tug | +0.101 |
whe | +0.098 |
fought | +0.093 |
Mine | +0.092 |
Carl | +0.091 |
Neg logit contributions | |
oral | -0.097 |
?'" | -0.094 |
moral | -0.090 |
adies | -0.088 |
invites | -0.088 |
Token | spent |
Token ID | 3377 |
Feature activation | +7.28e-01 |
Pos logit contributions | |
whe | +0.046 |
Mine | +0.046 |
arrass | +0.045 |
oa | +0.045 |
urt | +0.043 |
Neg logit contributions | |
oral | -0.037 |
decides | -0.037 |
moral | -0.037 |
invites | -0.036 |
?'" | -0.035 |
Token | the |
Token ID | 262 |
Feature activation | +7.49e-01 |
Pos logit contributions | |
whe | +0.020 |
tug | +0.019 |
Mine | +0.019 |
urt | +0.019 |
arrass | +0.018 |
Neg logit contributions | |
| -0.013 |
Fore | -0.013 |
moral | -0.013 |
oral | -0.013 |
?" | -0.013 |
Token | afternoon |
Token ID | 6672 |
Feature activation | +2.24e-02 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Mine | +0.019 |
Carl | +0.019 |
fought | +0.019 |
Neg logit contributions | |
oral | -0.015 |
moral | -0.015 |
Fore | -0.014 |
invites | -0.014 |
?'" | -0.013 |
Token | swimming |
Token ID | 14899 |
Feature activation | +5.00e-01 |
Pos logit contributions | |
arrass | +0.001 |
urt | +0.000 |
oa | +0.000 |
cream | +0.000 |
Mine | +0.000 |
Neg logit contributions | |
invites | -0.001 |
meets | -0.000 |
| -0.000 |
?" | -0.000 |
moral | -0.000 |
Token | , |
Token ID | 11 |
Feature activation | +6.11e-01 |
Pos logit contributions | |
tug | +0.024 |
whe | +0.023 |
Carl | +0.023 |
Mine | +0.022 |
fought | +0.021 |
Neg logit contributions | |
oral | -0.032 |
?'" | -0.031 |
adies | -0.031 |
moral | -0.030 |
Fore | -0.030 |
Token | Fl |
Token ID | 1610 |
Feature activation | +8.83e-02 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.013 |
Carl | +0.012 |
Mine | +0.012 |
fought | +0.012 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
adies | -0.015 |
moral | -0.015 |
Fore | -0.014 |
Token | opsy |
Token ID | 44522 |
Feature activation | -3.25e-01 |
Pos logit contributions | |
tug | +0.002 |
iled | +0.002 |
Carl | +0.002 |
urt | +0.002 |
Mine | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
adies | -0.002 |
invites | -0.002 |
moral | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +1.91e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
adies | +0.007 |
Fore | +0.007 |
moral | +0.007 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.008 |
Carl | -0.008 |
fought | -0.007 |
Mine | -0.007 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
tug | +0.047 |
whe | +0.046 |
Mine | +0.042 |
Carl | +0.042 |
fought | +0.042 |
Neg logit contributions | |
oral | -0.044 |
?'" | -0.042 |
moral | -0.042 |
invites | -0.041 |
?" | -0.041 |
Token | . |
Token ID | 13 |
Feature activation | +4.11e+00 |
Pos logit contributions | |
tug | +0.025 |
whe | +0.024 |
Mine | +0.023 |
Carl | +0.023 |
fought | +0.022 |
Neg logit contributions | |
oral | -0.025 |
?'" | -0.024 |
Fore | -0.023 |
moral | -0.023 |
?" | -0.023 |
Token | Snow |
Token ID | 7967 |
Feature activation | +1.75e+00 |
Pos logit contributions | |
tug | +0.089 |
whe | +0.086 |
fought | +0.081 |
ci | +0.077 |
Mine | +0.077 |
Neg logit contributions | |
oral | -0.109 |
?'" | -0.104 |
| -0.103 |
moral | -0.102 |
Fore | -0.101 |
Token | man |
Token ID | 805 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
tug | +0.041 |
whe | +0.037 |
Mine | +0.037 |
Carl | +0.035 |
arrass | +0.035 |
Neg logit contributions | |
oral | -0.044 |
?'" | -0.042 |
moral | -0.041 |
Fore | -0.041 |
?" | -0.041 |
Token | were |
Token ID | 547 |
Feature activation | +2.75e-02 |
Pos logit contributions | |
tug | +0.044 |
arrass | +0.042 |
whe | +0.042 |
Mine | +0.042 |
oa | +0.040 |
Neg logit contributions | |
oral | -0.034 |
moral | -0.032 |
invites | -0.032 |
?'" | -0.032 |
decides | -0.031 |
Token | the |
Token ID | 262 |
Feature activation | -3.34e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Mine | +0.001 |
Carl | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
moral | -0.001 |
adies | -0.001 |
Fore | -0.001 |
Token | best |
Token ID | 1266 |
Feature activation | +6.94e-01 |
Pos logit contributions | |
oral | +0.006 |
?'" | +0.006 |
moral | +0.005 |
Fore | +0.005 |
adies | +0.005 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.010 |
Carl | -0.010 |
Mine | -0.009 |
fought | -0.009 |
Token | on |
Token ID | 319 |
Feature activation | +8.06e-01 |
Pos logit contributions | |
tug | +0.021 |
whe | +0.020 |
Carl | +0.020 |
Mine | +0.019 |
fought | +0.019 |
Neg logit contributions | |
oral | -0.023 |
?'" | -0.022 |
adies | -0.021 |
?" | -0.021 |
moral | -0.021 |
Token | , |
Token ID | 11 |
Feature activation | +5.59e-01 |
Pos logit contributions | |
tug | +0.017 |
whe | +0.016 |
Carl | +0.016 |
Mine | +0.015 |
fought | +0.015 |
Neg logit contributions | |
oral | -0.022 |
?'" | -0.021 |
adies | -0.021 |
Fore | -0.020 |
moral | -0.020 |
Token | Lily |
Token ID | 20037 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.013 |
Carl | +0.013 |
Mine | +0.012 |
fought | +0.012 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.013 |
adies | -0.013 |
Fore | -0.012 |
moral | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | +2.03e+00 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.013 |
adies | +0.013 |
Fore | +0.012 |
moral | +0.012 |
Neg logit contributions | |
tug | -0.013 |
whe | -0.013 |
Carl | -0.012 |
Mine | -0.012 |
fought | -0.012 |
Token | Mrs |
Token ID | 9074 |
Feature activation | +2.55e+00 |
Pos logit contributions | |
tug | +0.058 |
whe | +0.057 |
Carl | +0.054 |
Mine | +0.054 |
iled | +0.053 |
Neg logit contributions | |
oral | -0.038 |
?'" | -0.037 |
moral | -0.035 |
?" | -0.035 |
invites | -0.035 |
Token | . |
Token ID | 13 |
Feature activation | +4.09e+00 |
Pos logit contributions | |
tug | +0.065 |
whe | +0.060 |
Carl | +0.057 |
Mine | +0.057 |
skate | +0.056 |
Neg logit contributions | |
oral | -0.059 |
?'" | -0.057 |
?" | -0.055 |
moral | -0.055 |
| -0.055 |
Token | Johnson |
Token ID | 5030 |
Feature activation | +1.75e+00 |
Pos logit contributions | |
tug | +0.101 |
ci | +0.093 |
whe | +0.090 |
fought | +0.086 |
Carl | +0.085 |
Neg logit contributions | |
oral | -0.097 |
| -0.096 |
?'" | -0.094 |
moral | -0.088 |
Fore | -0.087 |
Token | exercised |
Token ID | 25805 |
Feature activation | +8.30e-01 |
Pos logit contributions | |
Mine | +0.056 |
tug | +0.055 |
arrass | +0.054 |
whe | +0.054 |
Kenny | +0.053 |
Neg logit contributions | |
decides | -0.024 |
invites | -0.024 |
oral | -0.024 |
moral | -0.023 |
?'" | -0.022 |
Token | together |
Token ID | 1978 |
Feature activation | +9.19e-01 |
Pos logit contributions | |
tug | +0.017 |
whe | +0.017 |
Carl | +0.016 |
Mine | +0.016 |
fought | +0.015 |
Neg logit contributions | |
oral | -0.022 |
?'" | -0.022 |
adies | -0.021 |
moral | -0.021 |
?" | -0.021 |
Token | every |
Token ID | 790 |
Feature activation | +1.82e+00 |
Pos logit contributions | |
whe | +0.024 |
tug | +0.024 |
Mine | +0.024 |
arrass | +0.023 |
Kenny | +0.023 |
Neg logit contributions | |
oral | -0.018 |
?'" | -0.018 |
moral | -0.017 |
?" | -0.017 |
invites | -0.017 |
Token | morning |
Token ID | 3329 |
Feature activation | -1.68e+00 |
Pos logit contributions | |
tug | +0.044 |
Carl | +0.039 |
whe | +0.039 |
fought | +0.037 |
Mine | +0.037 |
Neg logit contributions | |
oral | -0.044 |
adies | -0.043 |
?'" | -0.043 |
Prin | -0.041 |
Fore | -0.041 |
Token |
|
Token ID | 198 |
Feature activation | +1.49e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
adies | -0.001 |
?'" | -0.001 |
moral | -0.001 |
Fore | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +2.76e-02 |
Pos logit contributions | |
Carl | +0.004 |
whe | +0.003 |
Andy | +0.003 |
Tim | +0.003 |
tug | +0.003 |
Neg logit contributions | |
airport | -0.004 |
adies | -0.004 |
decides | -0.004 |
embassy | -0.004 |
bride | -0.004 |
Token | Tim |
Token ID | 14967 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Mine | +0.001 |
ci | +0.001 |
Carl | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
moral | -0.001 |
Fore | -0.001 |
?" | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +1.97e+00 |
Pos logit contributions | |
oral | +0.003 |
?'" | +0.003 |
decides | +0.003 |
?" | +0.003 |
moral | +0.003 |
Neg logit contributions | |
whe | -0.004 |
tug | -0.004 |
Mine | -0.004 |
Carl | -0.004 |
arrass | -0.004 |
Token | Mr |
Token ID | 1770 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
tug | +0.044 |
whe | +0.043 |
Mine | +0.041 |
iled | +0.040 |
urt | +0.038 |
Neg logit contributions | |
oral | -0.048 |
?'" | -0.048 |
moral | -0.047 |
?" | -0.046 |
invites | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +4.09e+00 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.025 |
Mine | +0.023 |
Carl | +0.023 |
fought | +0.023 |
Neg logit contributions | |
oral | -0.027 |
?'" | -0.026 |
?" | -0.026 |
moral | -0.025 |
adies | -0.025 |
Token | Bear |
Token ID | 14732 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
tug | +0.101 |
whe | +0.100 |
fought | +0.091 |
Mine | +0.091 |
arrass | +0.089 |
Neg logit contributions | |
oral | -0.095 |
?'" | -0.092 |
moral | -0.091 |
| -0.089 |
?" | -0.088 |
Token | went |
Token ID | 1816 |
Feature activation | -1.71e+00 |
Pos logit contributions | |
disl | +0.051 |
igsaw | +0.050 |
whe | +0.050 |
Mine | +0.049 |
MY | +0.048 |
Neg logit contributions | |
decides | -0.028 |
moral | -0.026 |
oral | -0.025 |
met | -0.024 |
would | -0.023 |
Token | to |
Token ID | 284 |
Feature activation | -4.88e-01 |
Pos logit contributions | |
oral | +0.035 |
moral | +0.035 |
?" | +0.035 |
invites | +0.035 |
| +0.034 |
Neg logit contributions | |
tug | -0.043 |
Mine | -0.042 |
whe | -0.041 |
urt | -0.040 |
arrass | -0.040 |
Token | Sue |
Token ID | 26089 |
Feature activation | +4.92e-01 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.012 |
adies | +0.012 |
moral | +0.012 |
Fore | +0.012 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.010 |
Carl | -0.010 |
Mine | -0.010 |
fought | -0.009 |
Token | and |
Token ID | 290 |
Feature activation | +1.70e+00 |
Pos logit contributions | |
whe | +0.015 |
tug | +0.015 |
Yeah | +0.015 |
arrass | +0.015 |
oa | +0.015 |
Neg logit contributions | |
?" | -0.007 |
?'" | -0.007 |
invites | -0.007 |
| -0.007 |
oral | -0.007 |
Token | happy |
Token ID | 3772 |
Feature activation | +6.04e-01 |
Pos logit contributions | |
oral | +0.020 |
?'" | +0.019 |
adies | +0.019 |
moral | +0.019 |
?" | +0.018 |
Neg logit contributions | |
tug | -0.015 |
whe | -0.014 |
Carl | -0.013 |
Mine | -0.013 |
fought | -0.013 |
Token | . |
Token ID | 13 |
Feature activation | +7.04e-01 |
Pos logit contributions | |
tug | +0.018 |
whe | +0.017 |
Mine | +0.017 |
Laser | +0.016 |
arrass | +0.016 |
Neg logit contributions | |
?" | -0.011 |
?'" | -0.010 |
invites | -0.010 |
moral | -0.010 |
oral | -0.010 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -4.57e-02 |
Pos logit contributions | |
Carl | +0.016 |
Andy | +0.016 |
Mine | +0.016 |
Kenny | +0.015 |
Tim | +0.015 |
Neg logit contributions | |
adies | -0.018 |
oral | -0.018 |
?'" | -0.016 |
moral | -0.016 |
airport | -0.016 |
Token | On |
Token ID | 2202 |
Feature activation | +1.23e+00 |
Pos logit contributions | |
oral | +0.001 |
Him | +0.001 |
adies | +0.001 |
moral | +0.001 |
decides | +0.001 |
Neg logit contributions | |
Carl | -0.001 |
whe | -0.001 |
Mine | -0.001 |
Kenny | -0.001 |
Andy | -0.001 |
Token | a |
Token ID | 257 |
Feature activation | +2.29e+00 |
Pos logit contributions | |
tug | +0.023 |
Carl | +0.022 |
Mine | +0.020 |
whe | +0.020 |
Andy | +0.020 |
Neg logit contributions | |
oral | -0.036 |
adies | -0.035 |
decides | -0.034 |
?'" | -0.034 |
invites | -0.033 |
Token | sunny |
Token ID | 27737 |
Feature activation | +4.08e+00 |
Pos logit contributions | |
tug | +0.056 |
whe | +0.052 |
Carl | +0.051 |
Mine | +0.050 |
skate | +0.049 |
Neg logit contributions | |
oral | -0.055 |
?'" | -0.052 |
adies | -0.052 |
moral | -0.050 |
decides | -0.050 |
Token | day |
Token ID | 1110 |
Feature activation | +1.26e+00 |
Pos logit contributions | |
tug | +0.107 |
whe | +0.103 |
Mine | +0.100 |
Carl | +0.099 |
fought | +0.097 |
Neg logit contributions | |
oral | -0.087 |
?'" | -0.083 |
?" | -0.081 |
moral | -0.080 |
adies | -0.080 |
Token | , |
Token ID | 11 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
tug | +0.030 |
whe | +0.030 |
Mine | +0.028 |
arrass | +0.028 |
fought | +0.028 |
Neg logit contributions | |
oral | -0.029 |
?'" | -0.028 |
?" | -0.028 |
adies | -0.027 |
moral | -0.027 |
Token | a |
Token ID | 257 |
Feature activation | +9.27e-01 |
Pos logit contributions | |
whe | +0.025 |
tug | +0.024 |
Mine | +0.023 |
urt | +0.023 |
ci | +0.023 |
Neg logit contributions | |
oral | -0.022 |
adies | -0.022 |
?" | -0.022 |
?'" | -0.022 |
| -0.021 |
Token | little |
Token ID | 1310 |
Feature activation | +5.74e-02 |
Pos logit contributions | |
tug | +0.027 |
whe | +0.027 |
Carl | +0.026 |
Mine | +0.026 |
fought | +0.025 |
Neg logit contributions | |
oral | -0.017 |
?'" | -0.016 |
adies | -0.015 |
moral | -0.015 |
?" | -0.015 |
Token | girl |
Token ID | 2576 |
Feature activation | -9.12e-01 |
Pos logit contributions | |
whe | +0.002 |
tug | +0.002 |
fought | +0.002 |
Mine | +0.002 |
Carl | +0.002 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
?" | -0.001 |
moral | -0.001 |
Fore | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +8.21e-02 |
Pos logit contributions | |
Carl | +0.001 |
whe | +0.001 |
tug | +0.001 |
Kenny | +0.001 |
Andy | +0.001 |
Neg logit contributions | |
adies | -0.001 |
decides | -0.001 |
?'" | -0.001 |
oral | -0.001 |
Him | -0.001 |
Token | Lu |
Token ID | 25596 |
Feature activation | +2.07e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
moral | -0.002 |
adies | -0.002 |
Fore | -0.002 |
Token | cy |
Token ID | 948 |
Feature activation | -3.18e-01 |
Pos logit contributions | |
tug | +0.004 |
whe | +0.004 |
Mine | +0.004 |
Carl | +0.004 |
skate | +0.004 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.005 |
?" | -0.005 |
invites | -0.005 |
adies | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +1.24e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
moral | +0.007 |
adies | +0.007 |
?" | +0.007 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.008 |
Carl | -0.007 |
Mine | -0.007 |
fought | -0.007 |
Token | Mr |
Token ID | 1770 |
Feature activation | +9.09e-01 |
Pos logit contributions | |
tug | +0.034 |
whe | +0.034 |
iled | +0.033 |
Mine | +0.033 |
urt | +0.032 |
Neg logit contributions | |
moral | -0.022 |
?'" | -0.021 |
invites | -0.021 |
oral | -0.021 |
?" | -0.021 |
Token | . |
Token ID | 13 |
Feature activation | +4.05e+00 |
Pos logit contributions | |
tug | +0.024 |
whe | +0.023 |
skate | +0.022 |
Mine | +0.022 |
fought | +0.021 |
Neg logit contributions | |
?'" | -0.019 |
oral | -0.019 |
| -0.019 |
?" | -0.018 |
Him | -0.017 |
Token | Funny |
Token ID | 40473 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
tug | +0.095 |
whe | +0.091 |
fought | +0.086 |
arrass | +0.083 |
Mine | +0.082 |
Neg logit contributions | |
oral | -0.099 |
?'" | -0.098 |
| -0.094 |
?" | -0.093 |
moral | -0.092 |
Token | Face |
Token ID | 15399 |
Feature activation | +5.09e-01 |
Pos logit contributions | |
tug | +0.036 |
rum | +0.036 |
whe | +0.035 |
disl | +0.035 |
arrass | +0.035 |
Neg logit contributions | |
invites | -0.027 |
decides | -0.025 |
airport | -0.025 |
| -0.025 |
oral | -0.024 |
Token | played |
Token ID | 2826 |
Feature activation | +5.68e-01 |
Pos logit contributions | |
Carl | +0.014 |
whe | +0.014 |
Mine | +0.014 |
oa | +0.013 |
urt | +0.013 |
Neg logit contributions | |
oral | -0.009 |
invites | -0.009 |
moral | -0.009 |
decides | -0.009 |
?" | -0.009 |
Token | together |
Token ID | 1978 |
Feature activation | +5.65e-01 |
Pos logit contributions | |
cream | +0.015 |
arrass | +0.014 |
hy | +0.014 |
urt | +0.014 |
ared | +0.014 |
Neg logit contributions | |
?" | -0.012 |
| -0.011 |
Prin | -0.011 |
?'" | -0.010 |
moral | -0.010 |
Token | all |
Token ID | 477 |
Feature activation | -4.99e-02 |
Pos logit contributions | |
cream | +0.017 |
oa | +0.016 |
disl | +0.016 |
arrass | +0.016 |
Mine | +0.016 |
Neg logit contributions | |
moral | -0.010 |
?" | -0.009 |
?'" | -0.009 |
oral | -0.009 |
airport | -0.008 |
Token | rainbow |
Token ID | 27223 |
Feature activation | -2.62e-02 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.025 |
Carl | +0.024 |
Mine | +0.024 |
fought | +0.024 |
Neg logit contributions | |
oral | -0.022 |
?'" | -0.021 |
?" | -0.020 |
moral | -0.020 |
adies | -0.020 |
Token | . |
Token ID | 13 |
Feature activation | +5.42e-01 |
Pos logit contributions | |
?" | +0.001 |
oral | +0.001 |
?'" | +0.001 |
airport | +0.001 |
adies | +0.001 |
Neg logit contributions | |
tug | -0.001 |
arrass | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
whe | -0.001 |
Token | Lucy |
Token ID | 22162 |
Feature activation | -2.63e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.012 |
ci | +0.011 |
fought | +0.011 |
Carl | +0.011 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.013 |
?" | -0.013 |
invites | -0.012 |
adies | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
?" | +0.005 |
?'" | +0.005 |
oral | +0.005 |
invites | +0.005 |
decides | +0.005 |
Neg logit contributions | |
whe | -0.007 |
tug | -0.007 |
Mine | -0.006 |
arrass | -0.006 |
Carl | -0.006 |
Token | Mr |
Token ID | 1770 |
Feature activation | +9.64e-01 |
Pos logit contributions | |
tug | +0.037 |
whe | +0.036 |
iled | +0.034 |
Carl | +0.033 |
ci | +0.032 |
Neg logit contributions | |
?'" | -0.041 |
?" | -0.040 |
oral | -0.040 |
invites | -0.040 |
moral | -0.039 |
Token | . |
Token ID | 13 |
Feature activation | +4.05e+00 |
Pos logit contributions | |
tug | +0.025 |
whe | +0.024 |
Mine | +0.022 |
ci | +0.022 |
skate | +0.022 |
Neg logit contributions | |
| -0.020 |
oral | -0.020 |
?'" | -0.020 |
?" | -0.019 |
decides | -0.019 |
Token | Wh |
Token ID | 854 |
Feature activation | +1.35e+00 |
Pos logit contributions | |
tug | +0.091 |
whe | +0.090 |
ci | +0.088 |
cream | +0.087 |
believe | +0.085 |
Neg logit contributions | |
| -0.107 |
?'" | -0.091 |
oral | -0.088 |
?" | -0.088 |
moral | -0.088 |
Token | isk |
Token ID | 1984 |
Feature activation | +2.65e+00 |
Pos logit contributions | |
tug | +0.039 |
whe | +0.036 |
Mine | +0.034 |
Carl | +0.034 |
skate | +0.033 |
Neg logit contributions | |
oral | -0.027 |
?'" | -0.026 |
adies | -0.025 |
?" | -0.024 |
Fore | -0.024 |
Token | ers |
Token ID | 364 |
Feature activation | +5.89e-01 |
Pos logit contributions | |
tug | +0.071 |
whe | +0.070 |
Carl | +0.067 |
Mine | +0.066 |
fought | +0.065 |
Neg logit contributions | |
oral | -0.055 |
?'" | -0.053 |
adies | -0.050 |
moral | -0.050 |
?" | -0.050 |
Token | were |
Token ID | 547 |
Feature activation | -3.50e-01 |
Pos logit contributions | |
Mine | +0.017 |
Carl | +0.016 |
oa | +0.016 |
arrass | +0.016 |
tug | +0.016 |
Neg logit contributions | |
oral | -0.010 |
moral | -0.010 |
?'" | -0.010 |
decides | -0.010 |
invites | -0.010 |
Token | very |
Token ID | 845 |
Feature activation | +3.69e-01 |
Pos logit contributions | |
?" | +0.009 |
?'" | +0.008 |
oral | +0.008 |
moral | +0.008 |
invites | +0.008 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.007 |
Mine | -0.007 |
arrass | -0.007 |
Carl | -0.007 |
Token | butterflies |
Token ID | 44571 |
Feature activation | -1.23e+00 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.012 |
Fore | +0.011 |
adies | +0.010 |
?" | +0.010 |
Neg logit contributions | |
tug | -0.011 |
Carl | -0.010 |
whe | -0.010 |
fought | -0.010 |
Mine | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | +2.06e-01 |
Pos logit contributions | |
?'" | +0.027 |
oral | +0.027 |
?" | +0.027 |
adies | +0.026 |
invites | +0.026 |
Neg logit contributions | |
tug | -0.030 |
whe | -0.029 |
Mine | -0.028 |
arrass | -0.028 |
Carl | -0.027 |
Token | Sally |
Token ID | 25737 |
Feature activation | -2.70e-01 |
Pos logit contributions | |
Mine | +0.004 |
Carl | +0.004 |
tug | +0.004 |
whe | +0.004 |
Kenny | +0.004 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.005 |
adies | -0.005 |
Fore | -0.005 |
moral | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +1.74e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.006 |
adies | +0.006 |
Fore | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Carl | -0.006 |
fought | -0.006 |
Mine | -0.006 |
Token | Mr |
Token ID | 1770 |
Feature activation | +6.72e-01 |
Pos logit contributions | |
tug | +0.034 |
whe | +0.032 |
Carl | +0.032 |
Mine | +0.031 |
fought | +0.030 |
Neg logit contributions | |
oral | -0.050 |
?'" | -0.048 |
adies | -0.048 |
moral | -0.047 |
Fore | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | +4.05e+00 |
Pos logit contributions | |
tug | +0.016 |
whe | +0.016 |
Mine | +0.015 |
Carl | +0.014 |
fought | +0.014 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
?" | -0.015 |
| -0.015 |
adies | -0.015 |
Token | Bunny |
Token ID | 28683 |
Feature activation | +1.55e+00 |
Pos logit contributions | |
tug | +0.089 |
whe | +0.087 |
Mine | +0.080 |
fought | +0.080 |
Carl | +0.078 |
Neg logit contributions | |
oral | -0.103 |
?'" | -0.101 |
moral | -0.097 |
?" | -0.097 |
adies | -0.096 |
Token | played |
Token ID | 2826 |
Feature activation | +6.78e-01 |
Pos logit contributions | |
tug | +0.042 |
Mine | +0.042 |
whe | +0.042 |
ello | +0.040 |
arrass | +0.040 |
Neg logit contributions | |
invites | -0.030 |
decides | -0.029 |
?" | -0.026 |
?'" | -0.026 |
moral | -0.026 |
Token | in |
Token ID | 287 |
Feature activation | +2.28e-01 |
Pos logit contributions | |
tug | +0.015 |
Mine | +0.014 |
cream | +0.014 |
urt | +0.014 |
whe | +0.014 |
Neg logit contributions | |
?" | -0.017 |
adies | -0.016 |
invites | -0.016 |
?'" | -0.016 |
oral | -0.016 |
Token | the |
Token ID | 262 |
Feature activation | +6.90e-02 |
Pos logit contributions | |
whe | +0.006 |
tug | +0.006 |
urt | +0.006 |
Mine | +0.006 |
arrass | +0.006 |
Neg logit contributions | |
?" | -0.004 |
moral | -0.004 |
invites | -0.004 |
?'" | -0.004 |
oral | -0.004 |
Token | garden |
Token ID | 11376 |
Feature activation | -5.83e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
ci | +0.002 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
Fore | -0.001 |
?" | -0.001 |
Token | - |
Token ID | 12 |
Feature activation | -3.02e-01 |
Pos logit contributions | |
oral | +0.089 |
?'" | +0.088 |
Fore | +0.087 |
Prin | +0.086 |
Him | +0.086 |
Neg logit contributions | |
tug | -0.065 |
whe | -0.057 |
fought | -0.054 |
Carl | -0.054 |
iled | -0.054 |
Token | two |
Token ID | 11545 |
Feature activation | -2.97e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.006 |
adies | +0.006 |
moral | +0.006 |
?" | +0.006 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.008 |
Carl | -0.007 |
Mine | -0.007 |
fought | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | -1.01e+00 |
Pos logit contributions | |
oral | +0.084 |
?'" | +0.081 |
Fore | +0.081 |
adies | +0.081 |
Him | +0.079 |
Neg logit contributions | |
tug | -0.057 |
whe | -0.052 |
Carl | -0.050 |
Mine | -0.049 |
fought | -0.048 |
Token | twenty |
Token ID | 8208 |
Feature activation | -2.74e+00 |
Pos logit contributions | |
oral | +0.027 |
adies | +0.025 |
?'" | +0.024 |
Fore | +0.024 |
Prin | +0.024 |
Neg logit contributions | |
tug | -0.023 |
Mine | -0.021 |
whe | -0.020 |
Carl | -0.020 |
skate | -0.019 |
Token | - |
Token ID | 12 |
Feature activation | +3.68e-02 |
Pos logit contributions | |
oral | +0.075 |
?'" | +0.075 |
Fore | +0.074 |
adies | +0.072 |
Prin | +0.072 |
Neg logit contributions | |
tug | -0.054 |
whe | -0.048 |
Carl | -0.045 |
urt | -0.045 |
fought | -0.045 |
Token | three |
Token ID | 15542 |
Feature activation | -4.26e+00 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
moral | -0.001 |
Fore | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -8.82e-01 |
Pos logit contributions | |
oral | +0.120 |
?'" | +0.115 |
Fore | +0.113 |
adies | +0.113 |
moral | +0.112 |
Neg logit contributions | |
tug | -0.085 |
whe | -0.078 |
Mine | -0.075 |
Carl | -0.074 |
fought | -0.072 |
Token | twenty |
Token ID | 8208 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
oral | +0.023 |
adies | +0.021 |
?'" | +0.021 |
Fore | +0.021 |
Prin | +0.020 |
Neg logit contributions | |
tug | -0.021 |
Mine | -0.019 |
whe | -0.019 |
Carl | -0.018 |
skate | -0.017 |
Token | - |
Token ID | 12 |
Feature activation | -1.35e-01 |
Pos logit contributions | |
oral | +0.081 |
?'" | +0.080 |
Fore | +0.079 |
adies | +0.078 |
Prin | +0.077 |
Neg logit contributions | |
tug | -0.058 |
whe | -0.052 |
Carl | -0.049 |
fought | -0.049 |
urt | -0.049 |
Token | four |
Token ID | 14337 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
oral | +0.003 |
?'" | +0.003 |
adies | +0.003 |
moral | +0.003 |
Fore | +0.003 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
Carl | -0.003 |
Mine | -0.003 |
fought | -0.003 |
Token | , |
Token ID | 11 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
oral | +0.079 |
?'" | +0.077 |
Fore | +0.076 |
adies | +0.076 |
Him | +0.075 |
Neg logit contributions | |
tug | -0.056 |
whe | -0.050 |
Carl | -0.048 |
Mine | -0.048 |
fought | -0.047 |
Token | only |
Token ID | 691 |
Feature activation | -1.08e-01 |
Pos logit contributions | |
oral | +0.023 |
adies | +0.021 |
?'" | +0.021 |
Fore | +0.021 |
Prin | +0.020 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.016 |
Mine | -0.015 |
Carl | -0.015 |
fought | -0.015 |
Token | give |
Token ID | 1577 |
Feature activation | -5.95e-01 |
Pos logit contributions | |
oral | +0.003 |
adies | +0.002 |
?'" | +0.002 |
Fore | +0.002 |
moral | +0.002 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.002 |
Mine | -0.002 |
Carl | -0.002 |
fought | -0.002 |
Token | out |
Token ID | 503 |
Feature activation | -2.98e-01 |
Pos logit contributions | |
oral | +0.017 |
?'" | +0.016 |
adies | +0.015 |
moral | +0.015 |
Fore | +0.015 |
Neg logit contributions | |
tug | -0.012 |
Mine | -0.012 |
Carl | -0.011 |
whe | -0.011 |
cream | -0.011 |
Token | on |
Token ID | 319 |
Feature activation | +8.62e-02 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
adies | +0.007 |
moral | +0.007 |
Fore | +0.006 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.007 |
Mine | -0.007 |
fought | -0.006 |
Token | special |
Token ID | 2041 |
Feature activation | +4.80e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
adies | -0.002 |
moral | -0.002 |
Fore | -0.002 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.99e+00 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.008 |
Mine | +0.007 |
ci | +0.007 |
skate | +0.007 |
Neg logit contributions | |
oral | -0.014 |
Fore | -0.014 |
?'" | -0.014 |
adies | -0.013 |
Prin | -0.013 |
Token | . |
Token ID | 13 |
Feature activation | -6.38e-01 |
Pos logit contributions | |
oral | +0.097 |
Fore | +0.091 |
moral | +0.089 |
Prin | +0.088 |
?'" | +0.088 |
Neg logit contributions | |
tug | -0.091 |
whe | -0.090 |
Mine | -0.086 |
urt | -0.083 |
iled | -0.083 |
Token | â |
Token ID | 22940 |
Feature activation | -3.25e-01 |
Pos logit contributions | |
oral | +0.017 |
adies | +0.016 |
moral | +0.015 |
Fore | +0.015 |
airport | +0.015 |
Neg logit contributions | |
Mine | -0.015 |
Carl | -0.014 |
whe | -0.014 |
Kenny | -0.014 |
Andy | -0.014 |
Token | € |
Token ID | 26391 |
Feature activation | -4.09e-01 |
Pos logit contributions | |
oral | +0.008 |
?'" | +0.008 |
?" | +0.007 |
moral | +0.007 |
adies | +0.007 |
Neg logit contributions | |
tug | -0.008 |
Carl | -0.008 |
whe | -0.007 |
Mine | -0.007 |
fought | -0.007 |
Token | The |
Token ID | 383 |
Feature activation | -9.26e-01 |
Pos logit contributions | |
oral | +0.009 |
moral | +0.008 |
adies | +0.008 |
?'" | +0.008 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.011 |
Mine | -0.010 |
iled | -0.010 |
Carl | -0.010 |
Token | kids |
Token ID | 3988 |
Feature activation | -1.21e+00 |
Pos logit contributions | |
oral | +0.018 |
?'" | +0.018 |
adies | +0.018 |
Fore | +0.017 |
invites | +0.017 |
Neg logit contributions | |
tug | -0.025 |
whe | -0.024 |
Mine | -0.023 |
Carl | -0.023 |
skate | -0.022 |
Token | - |
Token ID | 12 |
Feature activation | +6.02e-02 |
Pos logit contributions | |
oral | +0.074 |
?" | +0.069 |
moral | +0.069 |
?'" | +0.069 |
adies | +0.067 |
Neg logit contributions | |
whe | -0.062 |
tug | -0.061 |
Carl | -0.060 |
Mine | -0.060 |
Laser | -0.058 |
Token | two |
Token ID | 11545 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
tug | +0.002 |
Carl | +0.002 |
whe | +0.002 |
fought | +0.002 |
Kenny | +0.002 |
Neg logit contributions | |
oral | -0.001 |
adies | -0.001 |
?'" | -0.001 |
Fore | -0.001 |
Prin | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -7.32e-01 |
Pos logit contributions | |
oral | +0.070 |
?'" | +0.067 |
adies | +0.066 |
moral | +0.065 |
Fore | +0.065 |
Neg logit contributions | |
tug | -0.059 |
whe | -0.057 |
Carl | -0.054 |
Mine | -0.054 |
fought | -0.052 |
Token | thirty |
Token ID | 12277 |
Feature activation | -2.63e+00 |
Pos logit contributions | |
oral | +0.019 |
?'" | +0.018 |
adies | +0.018 |
Fore | +0.017 |
Prin | +0.017 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.016 |
Mine | -0.015 |
Carl | -0.015 |
skate | -0.014 |
Token | - |
Token ID | 12 |
Feature activation | +2.91e-01 |
Pos logit contributions | |
oral | +0.076 |
?'" | +0.072 |
moral | +0.071 |
?" | +0.071 |
adies | +0.071 |
Neg logit contributions | |
tug | -0.049 |
whe | -0.048 |
Carl | -0.046 |
Mine | -0.046 |
fought | -0.043 |
Token | three |
Token ID | 15542 |
Feature activation | -3.99e+00 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
Carl | +0.008 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.005 |
adies | -0.005 |
Fore | -0.005 |
moral | -0.005 |
Token | , |
Token ID | 11 |
Feature activation | -5.29e-01 |
Pos logit contributions | |
oral | +0.103 |
?'" | +0.099 |
adies | +0.096 |
moral | +0.096 |
Fore | +0.095 |
Neg logit contributions | |
tug | -0.087 |
whe | -0.084 |
Carl | -0.080 |
Mine | -0.080 |
fought | -0.077 |
Token | thirty |
Token ID | 12277 |
Feature activation | -2.85e+00 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.013 |
adies | +0.012 |
Fore | +0.012 |
?" | +0.012 |
Neg logit contributions | |
tug | -0.012 |
whe | -0.012 |
Mine | -0.011 |
Carl | -0.011 |
skate | -0.010 |
Token | - |
Token ID | 12 |
Feature activation | +6.67e-02 |
Pos logit contributions | |
oral | +0.081 |
?'" | +0.077 |
moral | +0.076 |
?" | +0.076 |
adies | +0.075 |
Neg logit contributions | |
tug | -0.055 |
whe | -0.054 |
Carl | -0.051 |
Mine | -0.051 |
fought | -0.049 |
Token | four |
Token ID | 14337 |
Feature activation | -2.78e+00 |
Pos logit contributions | |
tug | +0.002 |
Carl | +0.002 |
whe | +0.002 |
fought | +0.002 |
Mine | +0.002 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
Fore | -0.001 |
moral | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -1.14e+00 |
Pos logit contributions | |
oral | +0.076 |
?'" | +0.074 |
adies | +0.072 |
Fore | +0.072 |
Him | +0.071 |
Neg logit contributions | |
tug | -0.057 |
whe | -0.052 |
Carl | -0.050 |
Mine | -0.050 |
fought | -0.049 |
Token | - |
Token ID | 12 |
Feature activation | +9.18e-02 |
Pos logit contributions | |
oral | +0.070 |
?" | +0.066 |
moral | +0.065 |
?'" | +0.064 |
invites | +0.062 |
Neg logit contributions | |
whe | -0.058 |
Carl | -0.058 |
tug | -0.057 |
Laser | -0.056 |
Mine | -0.056 |
Token | two |
Token ID | 11545 |
Feature activation | -2.69e+00 |
Pos logit contributions | |
tug | +0.003 |
Carl | +0.002 |
fought | +0.002 |
ci | +0.002 |
whe | +0.002 |
Neg logit contributions | |
oral | -0.002 |
adies | -0.002 |
?'" | -0.002 |
Fore | -0.002 |
Prin | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | -7.48e-01 |
Pos logit contributions | |
oral | +0.069 |
?'" | +0.067 |
adies | +0.065 |
moral | +0.064 |
Fore | +0.064 |
Neg logit contributions | |
tug | -0.059 |
whe | -0.057 |
Carl | -0.054 |
Mine | -0.053 |
fought | -0.052 |
Token | forty |
Token ID | 16571 |
Feature activation | -2.56e+00 |
Pos logit contributions | |
oral | +0.019 |
adies | +0.019 |
?'" | +0.019 |
Fore | +0.017 |
Prin | +0.017 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.016 |
Mine | -0.016 |
Carl | -0.015 |
skate | -0.014 |
Token | - |
Token ID | 12 |
Feature activation | +3.83e-01 |
Pos logit contributions | |
oral | +0.074 |
moral | +0.069 |
?'" | +0.069 |
?" | +0.069 |
adies | +0.067 |
Neg logit contributions | |
tug | -0.048 |
whe | -0.048 |
Carl | -0.046 |
Mine | -0.046 |
Laser | -0.044 |
Token | three |
Token ID | 15542 |
Feature activation | -3.97e+00 |
Pos logit contributions | |
tug | +0.011 |
Carl | +0.010 |
whe | +0.010 |
fought | +0.010 |
Mine | +0.010 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
adies | -0.007 |
Fore | -0.007 |
moral | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | -5.01e-01 |
Pos logit contributions | |
oral | +0.102 |
?'" | +0.098 |
adies | +0.096 |
moral | +0.095 |
Fore | +0.094 |
Neg logit contributions | |
tug | -0.087 |
whe | -0.084 |
Carl | -0.080 |
Mine | -0.079 |
fought | -0.077 |
Token | forty |
Token ID | 16571 |
Feature activation | -2.74e+00 |
Pos logit contributions | |
oral | +0.012 |
?'" | +0.012 |
adies | +0.012 |
Fore | +0.011 |
?" | +0.011 |
Neg logit contributions | |
tug | -0.012 |
whe | -0.012 |
Mine | -0.011 |
Carl | -0.011 |
skate | -0.010 |
Token | - |
Token ID | 12 |
Feature activation | +1.38e-01 |
Pos logit contributions | |
oral | +0.078 |
?'" | +0.073 |
moral | +0.073 |
?" | +0.073 |
adies | +0.071 |
Neg logit contributions | |
tug | -0.052 |
whe | -0.052 |
Carl | -0.051 |
Mine | -0.049 |
Laser | -0.048 |
Token | four |
Token ID | 14337 |
Feature activation | -2.79e+00 |
Pos logit contributions | |
tug | +0.004 |
Carl | +0.004 |
whe | +0.004 |
fought | +0.004 |
Mine | +0.004 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
adies | -0.002 |
Fore | -0.002 |
moral | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
oral | +0.076 |
?'" | +0.074 |
adies | +0.073 |
Fore | +0.072 |
Him | +0.071 |
Neg logit contributions | |
tug | -0.057 |
whe | -0.053 |
Mine | -0.050 |
Carl | -0.050 |
fought | -0.049 |
Token | bit |
Token ID | 1643 |
Feature activation | -3.91e-01 |
Pos logit contributions | |
oral | +0.020 |
moral | +0.019 |
?'" | +0.019 |
?" | +0.019 |
invites | +0.018 |
Neg logit contributions | |
fought | -0.016 |
whe | -0.015 |
iled | -0.015 |
tug | -0.015 |
arrass | -0.015 |
Token | more |
Token ID | 517 |
Feature activation | -1.50e-01 |
Pos logit contributions | |
?" | +0.007 |
moral | +0.007 |
?'" | +0.006 |
invites | +0.006 |
oral | +0.006 |
Neg logit contributions | |
oa | -0.012 |
Laser | -0.011 |
whe | -0.011 |
fought | -0.011 |
arrass | -0.011 |
Token | television |
Token ID | 5581 |
Feature activation | +2.49e-01 |
Pos logit contributions | |
?" | +0.003 |
invites | +0.003 |
moral | +0.003 |
oral | +0.003 |
?'" | +0.003 |
Neg logit contributions | |
whe | -0.004 |
oa | -0.004 |
Carl | -0.004 |
tug | -0.004 |
fought | -0.004 |
Token | on |
Token ID | 319 |
Feature activation | +2.26e-02 |
Pos logit contributions | |
tug | +0.007 |
arrass | +0.007 |
Carl | +0.007 |
oa | +0.007 |
Mine | +0.006 |
Neg logit contributions | |
?" | -0.005 |
?'" | -0.005 |
airport | -0.004 |
invites | -0.004 |
oral | -0.004 |
Token | special |
Token ID | 2041 |
Feature activation | +8.69e-01 |
Pos logit contributions | |
whe | +0.001 |
tug | +0.001 |
arrass | +0.001 |
iled | +0.001 |
fought | +0.001 |
Neg logit contributions | |
?'" | -0.000 |
?" | -0.000 |
oral | -0.000 |
moral | -0.000 |
airport | -0.000 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.90e+00 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Carl | +0.019 |
Mine | +0.019 |
iled | +0.019 |
Neg logit contributions | |
oral | -0.021 |
?" | -0.021 |
invites | -0.020 |
?'" | -0.020 |
moral | -0.020 |
Token | . |
Token ID | 13 |
Feature activation | -2.08e-01 |
Pos logit contributions | |
oral | +0.094 |
?'" | +0.093 |
?" | +0.091 |
adies | +0.089 |
moral | +0.088 |
Neg logit contributions | |
tug | -0.091 |
whe | -0.086 |
Carl | -0.085 |
Mine | -0.083 |
fought | -0.080 |
Token | Tommy |
Token ID | 19919 |
Feature activation | -1.59e-01 |
Pos logit contributions | |
oral | +0.005 |
adies | +0.005 |
?'" | +0.005 |
moral | +0.005 |
Fore | +0.004 |
Neg logit contributions | |
tug | -0.005 |
whe | -0.005 |
Mine | -0.005 |
Carl | -0.005 |
Kenny | -0.005 |
Token | was |
Token ID | 373 |
Feature activation | +2.33e-01 |
Pos logit contributions | |
?" | +0.003 |
oral | +0.003 |
decides | +0.003 |
?'" | +0.003 |
invites | +0.003 |
Neg logit contributions | |
whe | -0.004 |
Mine | -0.004 |
tug | -0.004 |
Carl | -0.004 |
arrass | -0.004 |
Token | so |
Token ID | 523 |
Feature activation | +5.65e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.005 |
urt | +0.005 |
Mine | +0.005 |
Neg logit contributions | |
?" | -0.006 |
?'" | -0.006 |
moral | -0.006 |
invites | -0.005 |
oral | -0.005 |
Token | happy |
Token ID | 3772 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
whe | +0.013 |
tug | +0.013 |
Mine | +0.012 |
Carl | +0.012 |
Laser | +0.012 |
Neg logit contributions | |
?" | -0.013 |
invites | -0.013 |
oral | -0.013 |
moral | -0.013 |
?'" | -0.013 |
Token | - |
Token ID | 12 |
Feature activation | +3.28e-01 |
Pos logit contributions | |
oral | +0.045 |
?'" | +0.042 |
Fore | +0.042 |
moral | +0.042 |
adies | +0.041 |
Neg logit contributions | |
tug | -0.036 |
whe | -0.032 |
Carl | -0.031 |
Mine | -0.031 |
fought | -0.031 |
Token | two |
Token ID | 11545 |
Feature activation | -2.41e+00 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.007 |
adies | -0.007 |
moral | -0.007 |
Fore | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | -7.17e-02 |
Pos logit contributions | |
oral | +0.068 |
Fore | +0.066 |
adies | +0.065 |
Him | +0.065 |
?'" | +0.065 |
Neg logit contributions | |
tug | -0.047 |
whe | -0.043 |
Carl | -0.040 |
fought | -0.040 |
Mine | -0.039 |
Token | thirty |
Token ID | 12277 |
Feature activation | -2.36e+00 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
Fore | +0.002 |
adies | +0.002 |
invites | +0.002 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Mine | -0.002 |
Carl | -0.002 |
fought | -0.002 |
Token | - |
Token ID | 12 |
Feature activation | +4.26e-01 |
Pos logit contributions | |
?'" | +0.066 |
oral | +0.066 |
adies | +0.066 |
Prin | +0.065 |
Fore | +0.065 |
Neg logit contributions | |
tug | -0.044 |
whe | -0.040 |
fought | -0.038 |
urt | -0.038 |
iled | -0.037 |
Token | three |
Token ID | 15542 |
Feature activation | -3.84e+00 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.011 |
Mine | +0.011 |
Carl | +0.011 |
fought | +0.010 |
Neg logit contributions | |
oral | -0.009 |
?'" | -0.008 |
moral | -0.008 |
adies | -0.008 |
invites | -0.008 |
Token | , |
Token ID | 11 |
Feature activation | +1.46e-01 |
Pos logit contributions | |
oral | +0.113 |
Him | +0.111 |
Fore | +0.109 |
moral | +0.106 |
?'" | +0.106 |
Neg logit contributions | |
tug | -0.074 |
iled | -0.061 |
whe | -0.061 |
Mine | -0.061 |
fought | -0.060 |
Token | thirty |
Token ID | 12277 |
Feature activation | -2.64e+00 |
Pos logit contributions | |
tug | +0.004 |
whe | +0.004 |
Mine | +0.003 |
Carl | +0.003 |
fought | +0.003 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
adies | -0.003 |
Fore | -0.003 |
moral | -0.003 |
Token | - |
Token ID | 12 |
Feature activation | +1.27e-01 |
Pos logit contributions | |
oral | +0.070 |
?'" | +0.069 |
adies | +0.069 |
Fore | +0.068 |
Prin | +0.067 |
Neg logit contributions | |
tug | -0.054 |
whe | -0.049 |
urt | -0.047 |
fought | -0.046 |
iled | -0.046 |
Token | four |
Token ID | 14337 |
Feature activation | -2.93e+00 |
Pos logit contributions | |
tug | +0.003 |
whe | +0.003 |
Mine | +0.003 |
Carl | +0.003 |
fought | +0.003 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
moral | -0.002 |
adies | -0.002 |
invites | -0.002 |
Token | ," |
Token ID | 553 |
Feature activation | -1.24e-01 |
Pos logit contributions | |
Him | +0.094 |
airport | +0.092 |
Fore | +0.091 |
oral | +0.090 |
Prin | +0.089 |
Neg logit contributions | |
tug | -0.048 |
iled | -0.039 |
Carl | -0.037 |
fought | -0.036 |
whe | -0.034 |
Token | 't |
Token ID | 470 |
Feature activation | +2.27e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
decides | -0.001 |
invites | -0.001 |
Token | know |
Token ID | 760 |
Feature activation | -4.32e-01 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.006 |
Mine | +0.006 |
ci | +0.006 |
oa | +0.006 |
Neg logit contributions | |
?" | -0.005 |
?'" | -0.004 |
adies | -0.004 |
moral | -0.004 |
oral | -0.004 |
Token | , |
Token ID | 11 |
Feature activation | -6.87e-02 |
Pos logit contributions | |
oral | +0.012 |
?'" | +0.011 |
moral | +0.011 |
adies | +0.011 |
Fore | +0.011 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.009 |
Carl | -0.008 |
Mine | -0.008 |
fought | -0.008 |
Token | Anna |
Token ID | 11735 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
?" | +0.001 |
moral | +0.001 |
adies | +0.001 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
ci | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | +3.67e-01 |
Pos logit contributions | |
oral | +0.045 |
?'" | +0.043 |
adies | +0.042 |
moral | +0.042 |
Fore | +0.042 |
Neg logit contributions | |
tug | -0.032 |
whe | -0.030 |
Carl | -0.029 |
Mine | -0.029 |
fought | -0.029 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.84e+00 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.009 |
Mine | +0.009 |
urt | +0.009 |
arrass | +0.009 |
Neg logit contributions | |
?" | -0.008 |
?'" | -0.008 |
adies | -0.007 |
oral | -0.007 |
moral | -0.007 |
Token | it |
Token ID | 340 |
Feature activation | +3.25e-02 |
Pos logit contributions | |
oral | +0.117 |
Prin | +0.107 |
Fore | +0.104 |
?'" | +0.102 |
adies | +0.100 |
Neg logit contributions | |
tug | -0.075 |
Carl | -0.070 |
Andy | -0.069 |
fought | -0.066 |
Mine | -0.065 |
Token | 's |
Token ID | 338 |
Feature activation | +6.39e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
fought | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
Him | -0.001 |
Fore | -0.001 |
Token | an |
Token ID | 281 |
Feature activation | -5.13e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.012 |
Carl | +0.011 |
Mine | +0.011 |
fought | +0.011 |
Neg logit contributions | |
oral | -0.018 |
?'" | -0.017 |
adies | -0.017 |
moral | -0.017 |
invites | -0.017 |
Token | earthquake |
Token ID | 16295 |
Feature activation | -1.14e-01 |
Pos logit contributions | |
oral | +0.016 |
?'" | +0.015 |
Fore | +0.015 |
invites | +0.015 |
| +0.014 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.008 |
Carl | -0.008 |
fought | -0.008 |
Mine | -0.008 |
Token | !" |
Token ID | 2474 |
Feature activation | -6.12e-01 |
Pos logit contributions | |
oral | +0.003 |
Fore | +0.003 |
adies | +0.003 |
?'" | +0.003 |
moral | +0.003 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
fought | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
Token | " |
Token ID | 366 |
Feature activation | -5.23e-02 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.011 |
Carl | +0.011 |
Mine | +0.011 |
fought | +0.010 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
moral | -0.006 |
adies | -0.006 |
?" | -0.006 |
Token | Okay |
Token ID | 16454 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
adies | +0.001 |
?" | +0.001 |
moral | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Mine | -0.001 |
Carl | -0.001 |
fought | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -3.71e-01 |
Pos logit contributions | |
?" | +0.042 |
?'" | +0.041 |
oral | +0.041 |
adies | +0.040 |
moral | +0.039 |
Neg logit contributions | |
tug | -0.045 |
whe | -0.045 |
fought | -0.042 |
Mine | -0.041 |
Carl | -0.041 |
Token | mom |
Token ID | 1995 |
Feature activation | -1.87e+00 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.008 |
Fore | +0.008 |
adies | +0.008 |
Prin | +0.008 |
Neg logit contributions | |
tug | -0.009 |
Mine | -0.009 |
Carl | -0.009 |
whe | -0.008 |
skate | -0.008 |
Token | , |
Token ID | 11 |
Feature activation | -1.26e-01 |
Pos logit contributions | |
oral | +0.047 |
?'" | +0.045 |
adies | +0.044 |
moral | +0.044 |
Fore | +0.043 |
Neg logit contributions | |
tug | -0.042 |
whe | -0.041 |
Carl | -0.039 |
Mine | -0.039 |
fought | -0.038 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.76e+00 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
moral | +0.002 |
Fore | +0.002 |
Neg logit contributions | |
tug | -0.004 |
whe | -0.004 |
Carl | -0.004 |
Mine | -0.004 |
fought | -0.003 |
Token | I |
Token ID | 314 |
Feature activation | -2.21e+00 |
Pos logit contributions | |
oral | +0.101 |
?'" | +0.091 |
Fore | +0.091 |
Prin | +0.091 |
adies | +0.090 |
Neg logit contributions | |
tug | -0.083 |
Carl | -0.078 |
Mine | -0.077 |
whe | -0.077 |
skate | -0.074 |
Token | will |
Token ID | 481 |
Feature activation | -1.09e+00 |
Pos logit contributions | |
oral | +0.038 |
?'" | +0.036 |
adies | +0.035 |
Fore | +0.034 |
Him | +0.033 |
Neg logit contributions | |
tug | -0.069 |
whe | -0.065 |
Mine | -0.063 |
Carl | -0.063 |
fought | -0.063 |
Token | try |
Token ID | 1949 |
Feature activation | -4.13e-01 |
Pos logit contributions | |
oral | +0.027 |
?'" | +0.025 |
adies | +0.025 |
Fore | +0.025 |
moral | +0.025 |
Neg logit contributions | |
tug | -0.026 |
whe | -0.024 |
Mine | -0.023 |
Carl | -0.023 |
fought | -0.023 |
Token | that |
Token ID | 326 |
Feature activation | -8.44e-01 |
Pos logit contributions | |
oral | +0.012 |
adies | +0.011 |
Fore | +0.011 |
?'" | +0.010 |
moral | +0.010 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.008 |
Carl | -0.008 |
Mine | -0.008 |
skate | -0.008 |
Token | . |
Token ID | 13 |
Feature activation | -3.70e-01 |
Pos logit contributions | |
oral | +0.022 |
adies | +0.020 |
Fore | +0.020 |
?'" | +0.020 |
Prin | +0.019 |
Neg logit contributions | |
tug | -0.019 |
whe | -0.018 |
Carl | -0.018 |
Mine | -0.018 |
baseball | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | -5.76e-01 |
Pos logit contributions | |
Carl | +0.002 |
Tim | +0.002 |
Mix | +0.002 |
Mine | +0.002 |
Andy | +0.002 |
Neg logit contributions | |
oral | -0.002 |
airport | -0.002 |
adies | -0.002 |
decides | -0.002 |
embassy | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | -7.49e-01 |
Pos logit contributions | |
adies | +0.016 |
decides | +0.016 |
airport | +0.015 |
bride | +0.015 |
embassy | +0.015 |
Neg logit contributions | |
Andy | -0.011 |
Carl | -0.011 |
Tim | -0.011 |
Laser | -0.011 |
Kenny | -0.010 |
Token | " |
Token ID | 1 |
Feature activation | +7.48e-01 |
Pos logit contributions | |
oral | +0.016 |
?'" | +0.015 |
adies | +0.015 |
invites | +0.015 |
Him | +0.014 |
Neg logit contributions | |
tug | -0.020 |
iled | -0.018 |
whe | -0.018 |
Carl | -0.018 |
Mine | -0.018 |
Token | Yes |
Token ID | 5297 |
Feature activation | -8.42e-01 |
Pos logit contributions | |
tug | +0.021 |
whe | +0.020 |
Carl | +0.020 |
Mine | +0.019 |
urt | +0.019 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
moral | -0.014 |
Fore | -0.013 |
adies | -0.013 |
Token | , |
Token ID | 11 |
Feature activation | -6.49e-01 |
Pos logit contributions | |
oral | +0.029 |
Fore | +0.028 |
Prin | +0.027 |
meets | +0.027 |
decides | +0.027 |
Neg logit contributions | |
tug | -0.012 |
Carl | -0.011 |
Andy | -0.010 |
Tim | -0.010 |
Mine | -0.010 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.71e+00 |
Pos logit contributions | |
oral | +0.017 |
?'" | +0.017 |
adies | +0.016 |
Fore | +0.016 |
invites | +0.016 |
Neg logit contributions | |
tug | -0.014 |
Carl | -0.013 |
whe | -0.013 |
Mine | -0.013 |
fought | -0.012 |
Token | a |
Token ID | 257 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
oral | +0.119 |
Prin | +0.111 |
Fore | +0.108 |
?'" | +0.107 |
adies | +0.103 |
Neg logit contributions | |
tug | -0.061 |
Andy | -0.056 |
Carl | -0.056 |
fought | -0.055 |
Tim | -0.053 |
Token | bad |
Token ID | 2089 |
Feature activation | +8.76e-02 |
Pos logit contributions | |
oral | +0.044 |
adies | +0.043 |
?'" | +0.043 |
Him | +0.041 |
Fore | +0.040 |
Neg logit contributions | |
tug | -0.040 |
Carl | -0.035 |
baseball | -0.034 |
skate | -0.034 |
Mine | -0.034 |
Token | sheep |
Token ID | 15900 |
Feature activation | -1.04e+00 |
Pos logit contributions | |
tug | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
arrass | +0.002 |
whe | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
Fore | -0.002 |
adies | -0.002 |
Prin | -0.002 |
Token | who |
Token ID | 508 |
Feature activation | -2.65e-02 |
Pos logit contributions | |
oral | +0.022 |
Fore | +0.020 |
adies | +0.020 |
?'" | +0.020 |
moral | +0.020 |
Neg logit contributions | |
tug | -0.027 |
whe | -0.026 |
fought | -0.026 |
Carl | -0.025 |
Mine | -0.025 |
Token | was |
Token ID | 373 |
Feature activation | +2.48e-01 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.000 |
adies | +0.000 |
moral | +0.000 |
Fore | +0.000 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | said |
Token ID | 531 |
Feature activation | +2.93e-01 |
Pos logit contributions | |
tug | +0.011 |
fought | +0.010 |
whe | +0.010 |
Mine | +0.010 |
Carl | +0.010 |
Neg logit contributions | |
oral | -0.017 |
adies | -0.016 |
?'" | -0.016 |
Him | -0.016 |
Fore | -0.015 |
Token | , |
Token ID | 11 |
Feature activation | +7.55e-01 |
Pos logit contributions | |
tug | +0.005 |
Carl | +0.005 |
whe | +0.004 |
Mine | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.009 |
?'" | -0.009 |
Fore | -0.009 |
airport | -0.008 |
adies | -0.008 |
Token | " |
Token ID | 366 |
Feature activation | +4.35e-01 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.020 |
Carl | +0.019 |
Mine | +0.019 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
adies | -0.015 |
Fore | -0.014 |
moral | -0.014 |
Token | Okay |
Token ID | 16454 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
tug | +0.010 |
whe | +0.010 |
Carl | +0.009 |
Mine | +0.009 |
fought | +0.009 |
Neg logit contributions | |
oral | -0.011 |
?'" | -0.010 |
moral | -0.010 |
adies | -0.010 |
Fore | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | -8.79e-02 |
Pos logit contributions | |
oral | +0.046 |
Fore | +0.043 |
?'" | +0.043 |
adies | +0.042 |
moral | +0.042 |
Neg logit contributions | |
tug | -0.032 |
whe | -0.030 |
Carl | -0.029 |
Mine | -0.029 |
fought | -0.027 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.71e+00 |
Pos logit contributions | |
oral | +0.002 |
moral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
?" | +0.002 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
fought | -0.002 |
Mine | -0.002 |
Carl | -0.002 |
Token | we |
Token ID | 356 |
Feature activation | -9.01e-01 |
Pos logit contributions | |
oral | +0.110 |
Prin | +0.101 |
Fore | +0.099 |
?'" | +0.097 |
adies | +0.097 |
Neg logit contributions | |
tug | -0.070 |
Mine | -0.065 |
Carl | -0.065 |
whe | -0.062 |
fought | -0.061 |
Token | can |
Token ID | 460 |
Feature activation | -9.90e-01 |
Pos logit contributions | |
oral | +0.014 |
?'" | +0.014 |
adies | +0.013 |
moral | +0.013 |
?" | +0.013 |
Neg logit contributions | |
tug | -0.029 |
whe | -0.028 |
Carl | -0.027 |
Mine | -0.027 |
fought | -0.026 |
Token | . |
Token ID | 13 |
Feature activation | -1.44e-01 |
Pos logit contributions | |
oral | +0.025 |
?'" | +0.025 |
?" | +0.024 |
moral | +0.024 |
invites | +0.023 |
Neg logit contributions | |
tug | -0.022 |
whe | -0.022 |
Carl | -0.021 |
arrass | -0.020 |
Mine | -0.020 |
Token | Do |
Token ID | 2141 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.003 |
adies | +0.003 |
Fore | +0.003 |
moral | +0.003 |
Neg logit contributions | |
tug | -0.003 |
Mine | -0.003 |
whe | -0.003 |
Carl | -0.003 |
arrass | -0.003 |
Token | you |
Token ID | 345 |
Feature activation | +1.05e-02 |
Pos logit contributions | |
oral | +0.029 |
?'" | +0.029 |
adies | +0.028 |
airport | +0.027 |
invites | +0.027 |
Neg logit contributions | |
tug | -0.019 |
Carl | -0.019 |
Mine | -0.019 |
whe | -0.019 |
oa | -0.018 |
Token | - |
Token ID | 12 |
Feature activation | +7.46e-01 |
Pos logit contributions | |
oral | +0.070 |
moral | +0.066 |
?" | +0.065 |
?'" | +0.065 |
invites | +0.063 |
Neg logit contributions | |
tug | -0.048 |
Carl | -0.048 |
whe | -0.047 |
Mine | -0.046 |
Laser | -0.045 |
Token | two |
Token ID | 11545 |
Feature activation | -2.67e+00 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Carl | +0.019 |
Mine | +0.019 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
adies | -0.014 |
moral | -0.014 |
Fore | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | -7.64e-01 |
Pos logit contributions | |
oral | +0.069 |
?'" | +0.066 |
moral | +0.064 |
adies | +0.064 |
?" | +0.064 |
Neg logit contributions | |
tug | -0.058 |
whe | -0.057 |
Carl | -0.054 |
Mine | -0.054 |
fought | -0.052 |
Token | forty |
Token ID | 16571 |
Feature activation | -2.42e+00 |
Pos logit contributions | |
oral | +0.020 |
adies | +0.019 |
?'" | +0.019 |
Fore | +0.018 |
Prin | +0.018 |
Neg logit contributions | |
tug | -0.018 |
whe | -0.017 |
Mine | -0.016 |
Carl | -0.015 |
skate | -0.015 |
Token | - |
Token ID | 12 |
Feature activation | +8.74e-01 |
Pos logit contributions | |
oral | +0.071 |
moral | +0.066 |
?'" | +0.066 |
?" | +0.066 |
adies | +0.064 |
Neg logit contributions | |
tug | -0.045 |
whe | -0.043 |
Carl | -0.043 |
Mine | -0.042 |
Laser | -0.040 |
Token | three |
Token ID | 15542 |
Feature activation | -3.70e+00 |
Pos logit contributions | |
tug | +0.024 |
whe | +0.023 |
Carl | +0.022 |
Mine | +0.022 |
fought | +0.022 |
Neg logit contributions | |
oral | -0.018 |
?'" | -0.017 |
adies | -0.017 |
moral | -0.016 |
Fore | -0.016 |
Token | , |
Token ID | 11 |
Feature activation | -5.18e-01 |
Pos logit contributions | |
oral | +0.093 |
?'" | +0.090 |
?" | +0.087 |
adies | +0.087 |
moral | +0.087 |
Neg logit contributions | |
tug | -0.082 |
whe | -0.080 |
Carl | -0.077 |
Mine | -0.075 |
fought | -0.074 |
Token | forty |
Token ID | 16571 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
oral | +0.013 |
adies | +0.013 |
?'" | +0.012 |
Fore | +0.012 |
Prin | +0.012 |
Neg logit contributions | |
tug | -0.012 |
whe | -0.012 |
Mine | -0.011 |
Carl | -0.010 |
skate | -0.010 |
Token | - |
Token ID | 12 |
Feature activation | +7.33e-01 |
Pos logit contributions | |
oral | +0.074 |
?'" | +0.070 |
moral | +0.070 |
?" | +0.069 |
adies | +0.068 |
Neg logit contributions | |
tug | -0.047 |
whe | -0.045 |
Carl | -0.045 |
Mine | -0.043 |
fought | -0.042 |
Token | four |
Token ID | 14337 |
Feature activation | -2.62e+00 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Carl | +0.019 |
Mine | +0.018 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
adies | -0.014 |
moral | -0.014 |
Fore | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | -1.09e+00 |
Pos logit contributions | |
oral | +0.069 |
?'" | +0.066 |
adies | +0.065 |
Fore | +0.064 |
moral | +0.064 |
Neg logit contributions | |
tug | -0.056 |
whe | -0.054 |
Carl | -0.051 |
Mine | -0.051 |
fought | -0.049 |
Token | cream |
Token ID | 8566 |
Feature activation | -7.91e-01 |
Pos logit contributions | |
oral | +0.048 |
?'" | +0.047 |
adies | +0.046 |
?" | +0.046 |
moral | +0.046 |
Neg logit contributions | |
tug | -0.022 |
whe | -0.021 |
Carl | -0.019 |
Mine | -0.019 |
fought | -0.018 |
Token | are |
Token ID | 389 |
Feature activation | -3.48e-01 |
Pos logit contributions | |
oral | +0.015 |
?'" | +0.015 |
?" | +0.015 |
moral | +0.014 |
invites | +0.014 |
Neg logit contributions | |
tug | -0.022 |
whe | -0.021 |
Carl | -0.021 |
Mine | -0.020 |
arrass | -0.020 |
Token | treats |
Token ID | 18432 |
Feature activation | -6.36e-01 |
Pos logit contributions | |
oral | +0.009 |
?" | +0.009 |
?'" | +0.009 |
moral | +0.009 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.007 |
Mine | -0.007 |
arrass | -0.007 |
Token | for |
Token ID | 329 |
Feature activation | +4.61e-01 |
Pos logit contributions | |
?" | +0.012 |
?'" | +0.011 |
oral | +0.011 |
invites | +0.010 |
moral | +0.010 |
Neg logit contributions | |
tug | -0.018 |
whe | -0.018 |
Carl | -0.018 |
arrass | -0.017 |
Kenny | -0.017 |
Token | special |
Token ID | 2041 |
Feature activation | -1.78e-01 |
Pos logit contributions | |
tug | +0.011 |
Carl | +0.010 |
Mine | +0.010 |
whe | +0.010 |
fought | +0.009 |
Neg logit contributions | |
oral | -0.012 |
adies | -0.011 |
?'" | -0.011 |
decides | -0.011 |
Fore | -0.011 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.70e+00 |
Pos logit contributions | |
oral | +0.005 |
?'" | +0.005 |
adies | +0.005 |
Fore | +0.005 |
moral | +0.004 |
Neg logit contributions | |
tug | -0.004 |
whe | -0.004 |
Mine | -0.003 |
fought | -0.003 |
Carl | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -5.27e-01 |
Pos logit contributions | |
oral | +0.090 |
?'" | +0.085 |
adies | +0.083 |
moral | +0.083 |
Fore | +0.083 |
Neg logit contributions | |
tug | -0.086 |
whe | -0.083 |
Mine | -0.079 |
Carl | -0.078 |
fought | -0.077 |
Token | We |
Token ID | 775 |
Feature activation | -9.15e-01 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.012 |
adies | +0.012 |
Fore | +0.012 |
moral | +0.012 |
Neg logit contributions | |
Mine | -0.012 |
tug | -0.012 |
whe | -0.012 |
Carl | -0.012 |
iled | -0.011 |
Token | cannot |
Token ID | 2314 |
Feature activation | -4.38e-02 |
Pos logit contributions | |
oral | +0.017 |
?" | +0.016 |
?'" | +0.016 |
moral | +0.016 |
decides | +0.015 |
Neg logit contributions | |
whe | -0.026 |
tug | -0.026 |
Carl | -0.025 |
Mine | -0.025 |
arrass | -0.024 |
Token | have |
Token ID | 423 |
Feature activation | +6.89e-02 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
?" | +0.001 |
moral | +0.001 |
adies | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | them |
Token ID | 606 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.001 |
Mine | +0.001 |
Carl | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
adies | -0.002 |
moral | -0.002 |
Fore | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | -6.14e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.004 |
Mine | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.003 |
adies | -0.003 |
moral | -0.003 |
?" | -0.003 |
Token | time |
Token ID | 640 |
Feature activation | -1.40e+00 |
Pos logit contributions | |
adies | +0.012 |
?'" | +0.012 |
oral | +0.011 |
Him | +0.011 |
Prin | +0.011 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.017 |
Mine | -0.016 |
fought | -0.015 |
trumpet | -0.015 |
Token | and |
Token ID | 290 |
Feature activation | -1.40e-01 |
Pos logit contributions | |
oral | +0.036 |
?'" | +0.035 |
adies | +0.034 |
moral | +0.034 |
?" | +0.034 |
Neg logit contributions | |
tug | -0.030 |
whe | -0.029 |
Carl | -0.028 |
Mine | -0.028 |
fought | -0.027 |
Token | for |
Token ID | 329 |
Feature activation | +2.24e-01 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.004 |
?" | +0.003 |
moral | +0.003 |
adies | +0.003 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
Carl | -0.003 |
Mine | -0.003 |
urt | -0.003 |
Token | special |
Token ID | 2041 |
Feature activation | +2.68e-01 |
Pos logit contributions | |
whe | +0.005 |
tug | +0.005 |
Carl | +0.005 |
Mine | +0.005 |
ci | +0.005 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.005 |
moral | -0.005 |
?" | -0.005 |
Fore | -0.005 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.68e+00 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.005 |
Mine | +0.005 |
Carl | +0.005 |
fought | +0.005 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
adies | -0.007 |
Fore | -0.007 |
moral | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -6.76e-01 |
Pos logit contributions | |
oral | +0.100 |
?'" | +0.096 |
adies | +0.094 |
Fore | +0.094 |
moral | +0.093 |
Neg logit contributions | |
tug | -0.075 |
whe | -0.072 |
Mine | -0.068 |
Carl | -0.066 |
fought | -0.065 |
Token | You |
Token ID | 921 |
Feature activation | -5.29e-01 |
Pos logit contributions | |
oral | +0.020 |
airport | +0.018 |
moral | +0.018 |
?'" | +0.018 |
adies | +0.018 |
Neg logit contributions | |
Mine | -0.013 |
whe | -0.011 |
Carl | -0.011 |
Laser | -0.011 |
tug | -0.011 |
Token | have |
Token ID | 423 |
Feature activation | -5.16e-01 |
Pos logit contributions | |
oral | +0.010 |
?'" | +0.010 |
moral | +0.010 |
?" | +0.010 |
adies | +0.010 |
Neg logit contributions | |
tug | -0.015 |
whe | -0.014 |
Carl | -0.014 |
Mine | -0.014 |
arrass | -0.013 |
Token | to |
Token ID | 284 |
Feature activation | +3.16e-01 |
Pos logit contributions | |
oral | +0.016 |
?'" | +0.015 |
adies | +0.015 |
moral | +0.014 |
Fore | +0.014 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.009 |
Mine | -0.008 |
fought | -0.008 |
Carl | -0.008 |
Token | ask |
Token ID | 1265 |
Feature activation | -5.84e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.007 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
?" | -0.007 |
moral | -0.007 |
adies | -0.007 |
Token | " |
Token ID | 366 |
Feature activation | +5.21e-02 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.009 |
Carl | +0.008 |
Mine | +0.008 |
fought | +0.008 |
Neg logit contributions | |
oral | -0.008 |
adies | -0.007 |
?'" | -0.007 |
Fore | -0.007 |
invites | -0.007 |
Token | OK |
Token ID | 11380 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Mine | +0.001 |
Carl | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
moral | -0.001 |
?" | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -2.45e-01 |
Pos logit contributions | |
oral | +0.035 |
?'" | +0.034 |
adies | +0.033 |
moral | +0.033 |
Fore | +0.032 |
Neg logit contributions | |
tug | -0.029 |
whe | -0.028 |
Carl | -0.027 |
Mine | -0.027 |
fought | -0.026 |
Token | mom |
Token ID | 1995 |
Feature activation | -1.81e+00 |
Pos logit contributions | |
oral | +0.005 |
?'" | +0.005 |
moral | +0.005 |
?" | +0.005 |
adies | +0.005 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.006 |
fought | -0.006 |
Mine | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | -2.56e-01 |
Pos logit contributions | |
oral | +0.045 |
?'" | +0.043 |
adies | +0.042 |
moral | +0.042 |
Fore | +0.042 |
Neg logit contributions | |
tug | -0.041 |
whe | -0.040 |
Carl | -0.038 |
Mine | -0.038 |
fought | -0.037 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.67e+00 |
Pos logit contributions | |
oral | +0.005 |
?'" | +0.004 |
adies | +0.004 |
moral | +0.004 |
Fore | +0.004 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.007 |
Carl | -0.007 |
Mine | -0.007 |
fought | -0.007 |
Token | I |
Token ID | 314 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
oral | +0.111 |
Prin | +0.101 |
Fore | +0.098 |
?'" | +0.097 |
adies | +0.097 |
Neg logit contributions | |
tug | -0.069 |
Mine | -0.065 |
Carl | -0.063 |
fought | -0.060 |
Tim | -0.060 |
Token | can |
Token ID | 460 |
Feature activation | -9.75e-01 |
Pos logit contributions | |
oral | +0.031 |
?'" | +0.029 |
moral | +0.028 |
?" | +0.027 |
adies | +0.027 |
Neg logit contributions | |
tug | -0.066 |
whe | -0.065 |
Carl | -0.063 |
Mine | -0.062 |
arrass | -0.061 |
Token | make |
Token ID | 787 |
Feature activation | +3.14e-01 |
Pos logit contributions | |
?" | +0.022 |
?'" | +0.022 |
oral | +0.022 |
moral | +0.021 |
invites | +0.021 |
Neg logit contributions | |
tug | -0.023 |
whe | -0.023 |
Carl | -0.022 |
arrass | -0.022 |
Mine | -0.022 |
Token | a |
Token ID | 257 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
fought | +0.007 |
Carl | +0.007 |
Mine | +0.007 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
?" | -0.007 |
moral | -0.006 |
adies | -0.006 |
Token | bear |
Token ID | 6842 |
Feature activation | -3.23e-01 |
Pos logit contributions | |
oral | +0.028 |
?'" | +0.027 |
moral | +0.026 |
?" | +0.026 |
adies | +0.026 |
Neg logit contributions | |
tug | -0.028 |
whe | -0.027 |
Carl | -0.026 |
Mine | -0.026 |
fought | -0.025 |
Token | 's |
Token ID | 338 |
Feature activation | +6.52e-01 |
Pos logit contributions | |
oral | +0.032 |
adies | +0.030 |
?'" | +0.029 |
Him | +0.029 |
Fore | +0.028 |
Neg logit contributions | |
tug | -0.016 |
Mine | -0.014 |
Carl | -0.014 |
Andy | -0.013 |
arrass | -0.013 |
Token | go |
Token ID | 467 |
Feature activation | -1.53e+00 |
Pos logit contributions | |
whe | +0.017 |
tug | +0.017 |
Mine | +0.016 |
Carl | +0.016 |
arrass | +0.016 |
Neg logit contributions | |
?" | -0.014 |
?'" | -0.013 |
oral | -0.013 |
invites | -0.013 |
moral | -0.012 |
Token | , |
Token ID | 11 |
Feature activation | +9.00e-02 |
Pos logit contributions | |
oral | +0.042 |
?'" | +0.038 |
Fore | +0.038 |
moral | +0.038 |
adies | +0.037 |
Neg logit contributions | |
tug | -0.033 |
whe | -0.032 |
Carl | -0.031 |
skate | -0.029 |
Mine | -0.029 |
Token | Sara |
Token ID | 24799 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
tug | +0.003 |
Carl | +0.002 |
whe | +0.002 |
Mine | +0.002 |
arrass | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
Fore | -0.002 |
adies | -0.002 |
moral | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | +8.02e-01 |
Pos logit contributions | |
?" | +0.043 |
?'" | +0.042 |
oral | +0.041 |
invites | +0.040 |
adies | +0.039 |
Neg logit contributions | |
tug | -0.054 |
whe | -0.054 |
Mine | -0.051 |
Carl | -0.050 |
fought | -0.048 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.65e+00 |
Pos logit contributions | |
tug | +0.019 |
whe | +0.018 |
Carl | +0.017 |
Mine | +0.017 |
fought | +0.016 |
Neg logit contributions | |
oral | -0.020 |
?'" | -0.019 |
adies | -0.018 |
moral | -0.018 |
Fore | -0.018 |
Token | we |
Token ID | 356 |
Feature activation | -1.43e+00 |
Pos logit contributions | |
oral | +0.113 |
Prin | +0.102 |
Fore | +0.099 |
?'" | +0.098 |
moral | +0.095 |
Neg logit contributions | |
tug | -0.070 |
fought | -0.063 |
Carl | -0.061 |
Andy | -0.061 |
Mine | -0.059 |
Token | will |
Token ID | 481 |
Feature activation | -1.25e+00 |
Pos logit contributions | |
oral | +0.029 |
?'" | +0.027 |
adies | +0.026 |
Fore | +0.026 |
moral | +0.025 |
Neg logit contributions | |
tug | -0.041 |
fought | -0.038 |
Carl | -0.038 |
whe | -0.038 |
Mine | -0.037 |
Token | find |
Token ID | 1064 |
Feature activation | -1.70e+00 |
Pos logit contributions | |
oral | +0.028 |
Fore | +0.026 |
adies | +0.025 |
?'" | +0.025 |
moral | +0.024 |
Neg logit contributions | |
tug | -0.034 |
whe | -0.031 |
Carl | -0.030 |
fought | -0.030 |
Mine | -0.030 |
Token | gold |
Token ID | 3869 |
Feature activation | -8.93e-01 |
Pos logit contributions | |
oral | +0.051 |
?'" | +0.047 |
moral | +0.045 |
Prin | +0.045 |
Fore | +0.043 |
Neg logit contributions | |
tug | -0.034 |
Carl | -0.031 |
Mine | -0.030 |
Andy | -0.029 |
Laser | -0.029 |
Token | and |
Token ID | 290 |
Feature activation | -6.10e-01 |
Pos logit contributions | |
oral | +0.023 |
?'" | +0.022 |
moral | +0.021 |
Fore | +0.021 |
adies | +0.021 |
Neg logit contributions | |
tug | -0.019 |
whe | -0.019 |
Mine | -0.018 |
Carl | -0.018 |
fought | -0.017 |
Token | allows |
Token ID | 3578 |
Feature activation | +2.70e-01 |
Pos logit contributions | |
oral | +0.028 |
?'" | +0.027 |
adies | +0.026 |
moral | +0.026 |
Fore | +0.026 |
Neg logit contributions | |
tug | -0.023 |
whe | -0.022 |
Carl | -0.021 |
Mine | -0.021 |
fought | -0.021 |
Token | us |
Token ID | 514 |
Feature activation | +1.07e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.007 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.005 |
adies | -0.005 |
moral | -0.005 |
?" | -0.005 |
Token | to |
Token ID | 284 |
Feature activation | +1.87e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Mine | +0.002 |
Carl | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
?" | -0.003 |
adies | -0.003 |
moral | -0.003 |
Token | mark |
Token ID | 1317 |
Feature activation | -4.80e-01 |
Pos logit contributions | |
tug | +0.004 |
whe | +0.004 |
Carl | +0.004 |
Mine | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.004 |
adies | -0.004 |
Fore | -0.004 |
moral | -0.004 |
Token | special |
Token ID | 2041 |
Feature activation | +4.37e-01 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.012 |
Fore | +0.012 |
adies | +0.011 |
moral | +0.011 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.010 |
Carl | -0.010 |
Mine | -0.009 |
Andy | -0.009 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.64e+00 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.009 |
Carl | +0.009 |
Mine | +0.009 |
fought | +0.008 |
Neg logit contributions | |
oral | -0.011 |
?'" | -0.011 |
adies | -0.011 |
Fore | -0.011 |
moral | -0.011 |
Token | ". |
Token ID | 1911 |
Feature activation | +2.29e-01 |
Pos logit contributions | |
oral | +0.085 |
?'" | +0.079 |
Fore | +0.078 |
adies | +0.077 |
moral | +0.077 |
Neg logit contributions | |
tug | -0.089 |
whe | -0.087 |
Mine | -0.083 |
Carl | -0.082 |
fought | -0.080 |
Token | John |
Token ID | 1757 |
Feature activation | -3.26e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.005 |
Mine | +0.005 |
fought | +0.005 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.005 |
adies | -0.005 |
moral | -0.005 |
?" | -0.005 |
Token | felt |
Token ID | 2936 |
Feature activation | -6.49e-01 |
Pos logit contributions | |
oral | +0.006 |
moral | +0.006 |
?'" | +0.006 |
?" | +0.006 |
decides | +0.006 |
Neg logit contributions | |
whe | -0.009 |
Mine | -0.009 |
Carl | -0.008 |
Laser | -0.008 |
tug | -0.008 |
Token | a |
Token ID | 257 |
Feature activation | -7.51e-01 |
Pos logit contributions | |
oral | +0.019 |
?'" | +0.018 |
adies | +0.018 |
Fore | +0.017 |
moral | +0.017 |
Neg logit contributions | |
tug | -0.013 |
whe | -0.012 |
Mine | -0.011 |
Carl | -0.011 |
fought | -0.011 |
Token | little |
Token ID | 1310 |
Feature activation | -9.37e-01 |
Pos logit contributions | |
oral | +0.023 |
?'" | +0.022 |
moral | +0.022 |
Fore | +0.021 |
?" | +0.021 |
Neg logit contributions | |
whe | -0.013 |
Carl | -0.012 |
tug | -0.012 |
Mine | -0.012 |
fought | -0.011 |
Token | Maybe |
Token ID | 6674 |
Feature activation | -3.31e+00 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.003 |
adies | +0.003 |
moral | +0.003 |
?" | +0.003 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.005 |
Carl | -0.005 |
Mine | -0.005 |
fought | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | -1.62e+00 |
Pos logit contributions | |
oral | +0.101 |
Prin | +0.092 |
?'" | +0.091 |
Fore | +0.089 |
adies | +0.088 |
Neg logit contributions | |
tug | -0.063 |
Carl | -0.060 |
Andy | -0.058 |
Mine | -0.056 |
whe | -0.055 |
Token | few |
Token ID | 1178 |
Feature activation | -2.89e-01 |
Pos logit contributions | |
oral | +0.041 |
?'" | +0.040 |
adies | +0.039 |
Fore | +0.037 |
Him | +0.037 |
Neg logit contributions | |
tug | -0.037 |
whe | -0.034 |
Mine | -0.033 |
Carl | -0.033 |
fought | -0.032 |
Token | minutes |
Token ID | 2431 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
Fore | +0.006 |
adies | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.006 |
fought | -0.006 |
Mine | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | -7.15e-01 |
Pos logit contributions | |
oral | +0.032 |
Fore | +0.030 |
?'" | +0.029 |
moral | +0.029 |
Prin | +0.029 |
Neg logit contributions | |
tug | -0.025 |
whe | -0.023 |
Carl | -0.023 |
fought | -0.023 |
Mine | -0.022 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.64e+00 |
Pos logit contributions | |
oral | +0.017 |
?'" | +0.016 |
adies | +0.016 |
moral | +0.016 |
Fore | +0.015 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.017 |
Carl | -0.016 |
Mine | -0.016 |
fought | -0.015 |
Token | a |
Token ID | 257 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
oral | +0.109 |
Prin | +0.099 |
Fore | +0.097 |
?'" | +0.096 |
adies | +0.095 |
Neg logit contributions | |
tug | -0.072 |
Carl | -0.064 |
Andy | -0.062 |
Mine | -0.062 |
fought | -0.061 |
Token | long |
Token ID | 890 |
Feature activation | -2.42e-01 |
Pos logit contributions | |
oral | +0.044 |
?'" | +0.043 |
adies | +0.042 |
Fore | +0.040 |
moral | +0.040 |
Neg logit contributions | |
tug | -0.039 |
whe | -0.037 |
Mine | -0.035 |
Carl | -0.035 |
fought | -0.034 |
Token | time |
Token ID | 640 |
Feature activation | -2.24e+00 |
Pos logit contributions | |
oral | +0.005 |
?'" | +0.005 |
adies | +0.005 |
Fore | +0.005 |
moral | +0.005 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Mine | -0.006 |
fought | -0.006 |
Carl | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | -3.48e-01 |
Pos logit contributions | |
oral | +0.057 |
?'" | +0.054 |
adies | +0.053 |
moral | +0.053 |
?" | +0.052 |
Neg logit contributions | |
tug | -0.050 |
whe | -0.049 |
Carl | -0.046 |
Mine | -0.046 |
fought | -0.045 |
Token | It |
Token ID | 632 |
Feature activation | +7.97e-01 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.006 |
adies | +0.006 |
Fore | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.010 |
Mine | -0.009 |
Carl | -0.009 |
Andy | -0.009 |
Token | the |
Token ID | 262 |
Feature activation | -5.65e-01 |
Pos logit contributions | |
oral | +0.003 |
adies | +0.003 |
?'" | +0.003 |
decides | +0.003 |
Fore | +0.003 |
Neg logit contributions | |
tug | -0.003 |
Mine | -0.002 |
whe | -0.002 |
Carl | -0.002 |
fought | -0.002 |
Token | dish |
Token ID | 9433 |
Feature activation | -7.41e-02 |
Pos logit contributions | |
oral | +0.011 |
?'" | +0.010 |
adies | +0.010 |
moral | +0.010 |
Fore | +0.010 |
Neg logit contributions | |
tug | -0.016 |
whe | -0.016 |
Mine | -0.015 |
Carl | -0.015 |
fought | -0.015 |
Token | washer |
Token ID | 45146 |
Feature activation | -2.72e-01 |
Pos logit contributions | |
oral | +0.002 |
adies | +0.002 |
Fore | +0.002 |
moral | +0.002 |
?'" | +0.002 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Mine | -0.001 |
ci | -0.001 |
Carl | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | +7.80e-02 |
Pos logit contributions | |
oral | +0.007 |
adies | +0.007 |
Fore | +0.007 |
?'" | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Mine | -0.006 |
fought | -0.006 |
Carl | -0.005 |
Token | or |
Token ID | 393 |
Feature activation | +1.06e-01 |
Pos logit contributions | |
tug | +0.002 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.002 |
adies | -0.002 |
Fore | -0.002 |
moral | -0.002 |
Token | maybe |
Token ID | 3863 |
Feature activation | -3.62e+00 |
Pos logit contributions | |
tug | +0.002 |
Carl | +0.002 |
whe | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.003 |
adies | -0.003 |
Fore | -0.003 |
?'" | -0.003 |
invites | -0.003 |
Token | daddy |
Token ID | 35822 |
Feature activation | -9.67e-01 |
Pos logit contributions | |
oral | +0.119 |
Prin | +0.108 |
Fore | +0.107 |
adies | +0.106 |
abby | +0.101 |
Neg logit contributions | |
tug | -0.061 |
Mine | -0.054 |
Tim | -0.052 |
Carl | -0.051 |
baseball | -0.050 |
Token | took |
Token ID | 1718 |
Feature activation | -7.23e-01 |
Pos logit contributions | |
oral | +0.020 |
?'" | +0.020 |
adies | +0.019 |
moral | +0.019 |
?" | +0.019 |
Neg logit contributions | |
tug | -0.026 |
whe | -0.025 |
Carl | -0.024 |
Mine | -0.024 |
fought | -0.023 |
Token | it |
Token ID | 340 |
Feature activation | -1.83e-01 |
Pos logit contributions | |
oral | +0.016 |
?'" | +0.015 |
adies | +0.015 |
moral | +0.015 |
Fore | +0.015 |
Neg logit contributions | |
tug | -0.019 |
whe | -0.018 |
Mine | -0.017 |
Carl | -0.017 |
fought | -0.017 |
Token | to |
Token ID | 284 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
oral | +0.005 |
Fore | +0.005 |
adies | +0.005 |
?'" | +0.005 |
invites | +0.005 |
Neg logit contributions | |
tug | -0.004 |
whe | -0.004 |
Mine | -0.003 |
Carl | -0.003 |
fought | -0.003 |
Token | work |
Token ID | 670 |
Feature activation | -1.65e-01 |
Pos logit contributions | |
oral | +0.034 |
adies | +0.033 |
Fore | +0.032 |
decides | +0.031 |
narrative | +0.031 |
Neg logit contributions | |
tug | -0.018 |
Carl | -0.015 |
baseball | -0.015 |
Tim | -0.015 |
Mine | -0.015 |
Token | and |
Token ID | 290 |
Feature activation | -3.32e-01 |
Pos logit contributions | |
tug | +0.010 |
whe | +0.010 |
Carl | +0.009 |
Mine | +0.009 |
arrass | +0.009 |
Neg logit contributions | |
oral | -0.009 |
?'" | -0.008 |
moral | -0.008 |
invites | -0.008 |
?" | -0.008 |
Token | washed |
Token ID | 18989 |
Feature activation | -1.72e-01 |
Pos logit contributions | |
oral | +0.007 |
invites | +0.007 |
moral | +0.007 |
?'" | +0.007 |
?" | +0.007 |
Neg logit contributions | |
whe | -0.008 |
tug | -0.008 |
Carl | -0.007 |
Mine | -0.007 |
arrass | -0.007 |
Token | it |
Token ID | 340 |
Feature activation | +9.40e-02 |
Pos logit contributions | |
?" | +0.003 |
?'" | +0.003 |
invites | +0.003 |
| +0.003 |
moral | +0.003 |
Neg logit contributions | |
whe | -0.005 |
tug | -0.005 |
Carl | -0.004 |
fought | -0.004 |
skate | -0.004 |
Token | for |
Token ID | 329 |
Feature activation | +5.75e-01 |
Pos logit contributions | |
whe | +0.002 |
Mine | +0.002 |
Kenny | +0.002 |
Carl | +0.002 |
ello | +0.002 |
Neg logit contributions | |
?" | -0.002 |
moral | -0.002 |
invites | -0.002 |
?'" | -0.002 |
Him | -0.002 |
Token | special |
Token ID | 2041 |
Feature activation | +5.22e-01 |
Pos logit contributions | |
whe | +0.015 |
ci | +0.015 |
tug | +0.014 |
Carl | +0.014 |
arrass | +0.014 |
Neg logit contributions | |
moral | -0.011 |
oral | -0.011 |
Fore | -0.011 |
?" | -0.011 |
Him | -0.011 |
Token | occasions |
Token ID | 12432 |
Feature activation | -3.60e+00 |
Pos logit contributions | |
Carl | +0.013 |
whe | +0.012 |
arrass | +0.012 |
oa | +0.012 |
tug | +0.012 |
Neg logit contributions | |
oral | -0.012 |
?" | -0.012 |
invites | -0.011 |
moral | -0.011 |
?'" | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +2.50e-01 |
Pos logit contributions | |
?" | +0.078 |
?'" | +0.078 |
oral | +0.076 |
adies | +0.073 |
moral | +0.071 |
Neg logit contributions | |
tug | -0.094 |
Carl | -0.091 |
whe | -0.090 |
Mine | -0.087 |
fought | -0.084 |
Token | |
Token ID | 220 |
Feature activation | -4.42e-01 |
Pos logit contributions | |
Carl | +0.006 |
whe | +0.006 |
Mine | +0.006 |
tug | +0.006 |
Andy | +0.006 |
Neg logit contributions | |
oral | -0.006 |
adies | -0.005 |
?'" | -0.005 |
moral | -0.005 |
Fore | -0.005 |
Token |
|
Token ID | 198 |
Feature activation | +2.56e-01 |
Pos logit contributions | |
airport | +0.012 |
decides | +0.012 |
oral | +0.012 |
adies | +0.011 |
embassy | +0.011 |
Neg logit contributions | |
whe | -0.010 |
Andy | -0.009 |
Carl | -0.009 |
Tim | -0.009 |
Kenny | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | +2.77e-01 |
Pos logit contributions | |
whe | +0.006 |
Carl | +0.006 |
tug | +0.006 |
Andy | +0.006 |
Kenny | +0.006 |
Neg logit contributions | |
decides | -0.006 |
adies | -0.006 |
oral | -0.006 |
airport | -0.006 |
narrative | -0.005 |
Token | And |
Token ID | 1870 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.010 |
Carl | +0.010 |
Mine | +0.010 |
ci | +0.010 |
Neg logit contributions | |
oral | -0.003 |
moral | -0.002 |
?'" | -0.002 |
Fore | -0.002 |
adies | -0.002 |
Token | " |
Token ID | 1 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
tug | +0.022 |
whe | +0.021 |
Mine | +0.020 |
Carl | +0.020 |
fought | +0.020 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
adies | -0.015 |
moral | -0.014 |
Fore | -0.014 |
Token | and |
Token ID | 290 |
Feature activation | -3.98e-01 |
Pos logit contributions | |
whe | +0.021 |
Mine | +0.019 |
tug | +0.019 |
Carl | +0.017 |
fought | +0.017 |
Neg logit contributions | |
oral | -0.028 |
adies | -0.027 |
Fore | -0.026 |
?'" | -0.026 |
invites | -0.026 |
Token | the |
Token ID | 262 |
Feature activation | -1.29e+00 |
Pos logit contributions | |
oral | +0.010 |
adies | +0.009 |
Fore | +0.009 |
?'" | +0.009 |
moral | +0.008 |
Neg logit contributions | |
tug | -0.010 |
Mine | -0.009 |
whe | -0.009 |
Carl | -0.009 |
fought | -0.009 |
Token | number |
Token ID | 1271 |
Feature activation | -4.13e-01 |
Pos logit contributions | |
oral | +0.028 |
moral | +0.027 |
?'" | +0.026 |
?" | +0.025 |
invites | +0.025 |
Neg logit contributions | |
tug | -0.033 |
whe | -0.032 |
iled | -0.031 |
Carl | -0.031 |
fought | -0.031 |
Token | " |
Token ID | 366 |
Feature activation | -1.50e-01 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.009 |
?" | +0.009 |
moral | +0.009 |
adies | +0.009 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.010 |
Carl | -0.010 |
Mine | -0.009 |
fought | -0.009 |
Token | three |
Token ID | 15542 |
Feature activation | -3.59e+00 |
Pos logit contributions | |
oral | +0.004 |
adies | +0.004 |
?'" | +0.004 |
invites | +0.003 |
moral | +0.003 |
Neg logit contributions | |
whe | -0.003 |
tug | -0.003 |
Mine | -0.003 |
fought | -0.003 |
Carl | -0.003 |
Token | ". |
Token ID | 1911 |
Feature activation | -5.44e-01 |
Pos logit contributions | |
oral | +0.070 |
Fore | +0.065 |
adies | +0.065 |
?'" | +0.064 |
moral | +0.063 |
Neg logit contributions | |
tug | -0.100 |
Mine | -0.095 |
whe | -0.095 |
iled | -0.091 |
fought | -0.089 |
Token | She |
Token ID | 1375 |
Feature activation | +7.20e-02 |
Pos logit contributions | |
oral | +0.014 |
Fore | +0.013 |
adies | +0.013 |
?'" | +0.013 |
moral | +0.012 |
Neg logit contributions | |
tug | -0.013 |
Mine | -0.012 |
whe | -0.012 |
Carl | -0.012 |
arrass | -0.011 |
Token | says |
Token ID | 1139 |
Feature activation | -2.55e-01 |
Pos logit contributions | |
tug | +0.002 |
Mine | +0.002 |
whe | +0.002 |
fought | +0.002 |
Carl | +0.002 |
Neg logit contributions | |
oral | -0.002 |
?'" | -0.001 |
adies | -0.001 |
Fore | -0.001 |
Him | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | +3.07e-01 |
Pos logit contributions | |
oral | +0.006 |
?'" | +0.005 |
adies | +0.005 |
Fore | +0.005 |
moral | +0.005 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Mine | -0.006 |
Carl | -0.006 |
fought | -0.006 |
Token | " |
Token ID | 366 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
Mine | +0.008 |
Carl | +0.008 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.006 |
adies | -0.006 |
Fore | -0.006 |
moral | -0.006 |
Token | But |
Token ID | 887 |
Feature activation | -5.86e-02 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
moral | -0.001 |
Fore | -0.001 |
Token | it |
Token ID | 340 |
Feature activation | +5.25e-01 |
Pos logit contributions | |
?" | +0.001 |
moral | +0.001 |
oral | +0.001 |
?'" | +0.001 |
airport | +0.001 |
Neg logit contributions | |
whe | -0.002 |
tug | -0.002 |
Mine | -0.002 |
cream | -0.002 |
arrass | -0.002 |
Token | was |
Token ID | 373 |
Feature activation | +6.27e-01 |
Pos logit contributions | |
whe | +0.017 |
Mine | +0.017 |
tug | +0.016 |
urt | +0.016 |
oa | +0.016 |
Neg logit contributions | |
moral | -0.008 |
oral | -0.007 |
?" | -0.007 |
Fore | -0.007 |
decides | -0.007 |
Token | not |
Token ID | 407 |
Feature activation | +7.11e-01 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.011 |
Mine | +0.010 |
Carl | +0.010 |
urt | +0.010 |
Neg logit contributions | |
oral | -0.018 |
moral | -0.018 |
?'" | -0.018 |
?" | -0.018 |
Fore | -0.017 |
Token | fun |
Token ID | 1257 |
Feature activation | +5.33e-01 |
Pos logit contributions | |
tug | +0.016 |
whe | +0.016 |
Carl | +0.015 |
Mine | +0.015 |
oa | +0.014 |
Neg logit contributions | |
?" | -0.018 |
?'" | -0.017 |
moral | -0.017 |
oral | -0.017 |
adies | -0.017 |
Token | at |
Token ID | 379 |
Feature activation | +3.11e+00 |
Pos logit contributions | |
tug | +0.017 |
cream | +0.016 |
Carl | +0.016 |
oa | +0.016 |
ked | +0.016 |
Neg logit contributions | |
?" | -0.009 |
invites | -0.008 |
airport | -0.007 |
decides | -0.007 |
adies | -0.007 |
Token | all |
Token ID | 477 |
Feature activation | +5.49e-01 |
Pos logit contributions | |
tug | +0.080 |
whe | +0.076 |
iled | +0.075 |
Mine | +0.075 |
arrass | +0.074 |
Neg logit contributions | |
?" | -0.067 |
oral | -0.066 |
moral | -0.065 |
?'" | -0.064 |
invites | -0.061 |
Token | . |
Token ID | 13 |
Feature activation | -8.47e-02 |
Pos logit contributions | |
tug | +0.016 |
whe | +0.014 |
Mine | +0.014 |
ked | +0.014 |
oa | +0.014 |
Neg logit contributions | |
?" | -0.011 |
invites | -0.011 |
oral | -0.010 |
?'" | -0.010 |
moral | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | -2.75e-01 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
Fore | +0.002 |
moral | +0.002 |
Neg logit contributions | |
tug | -0.002 |
Carl | -0.002 |
whe | -0.002 |
Mine | -0.002 |
Andy | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | -2.14e-01 |
Pos logit contributions | |
adies | +0.007 |
decides | +0.007 |
airport | +0.007 |
?'" | +0.006 |
embassy | +0.006 |
Neg logit contributions | |
Carl | -0.006 |
tug | -0.006 |
Andy | -0.006 |
Kenny | -0.006 |
whe | -0.006 |
Token | The |
Token ID | 464 |
Feature activation | -1.36e+00 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.004 |
adies | +0.004 |
?" | +0.004 |
invites | +0.004 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Carl | -0.005 |
Mine | -0.005 |
fought | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
tug | +0.009 |
whe | +0.009 |
Mine | +0.008 |
Carl | +0.008 |
arrass | +0.008 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
moral | -0.007 |
?" | -0.007 |
adies | -0.007 |
Token | his |
Token ID | 465 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
whe | +0.030 |
tug | +0.029 |
Mine | +0.028 |
iled | +0.027 |
urt | +0.026 |
Neg logit contributions | |
oral | -0.025 |
moral | -0.025 |
?'" | -0.025 |
invites | -0.024 |
?" | -0.024 |
Token | friends |
Token ID | 2460 |
Feature activation | +4.89e-01 |
Pos logit contributions | |
whe | +0.043 |
tug | +0.041 |
Mine | +0.040 |
arrass | +0.040 |
iled | +0.039 |
Neg logit contributions | |
oral | -0.026 |
moral | -0.026 |
airport | -0.023 |
invites | -0.023 |
?" | -0.023 |
Token | had |
Token ID | 550 |
Feature activation | +2.07e-01 |
Pos logit contributions | |
tug | +0.012 |
whe | +0.011 |
Mine | +0.011 |
Carl | +0.011 |
arrass | +0.010 |
Neg logit contributions | |
oral | -0.011 |
?'" | -0.011 |
?" | -0.011 |
adies | -0.011 |
moral | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +9.87e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.004 |
Mine | +0.004 |
ci | +0.004 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.005 |
Fore | -0.005 |
moral | -0.005 |
invites | -0.005 |
Token | wonderful |
Token ID | 7932 |
Feature activation | +3.09e+00 |
Pos logit contributions | |
tug | +0.021 |
whe | +0.020 |
Carl | +0.019 |
Mine | +0.019 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.026 |
?'" | -0.025 |
adies | -0.025 |
moral | -0.025 |
?" | -0.024 |
Token | time |
Token ID | 640 |
Feature activation | -2.77e-01 |
Pos logit contributions | |
tug | +0.089 |
whe | +0.086 |
Carl | +0.083 |
Mine | +0.082 |
fought | +0.081 |
Neg logit contributions | |
oral | -0.059 |
?'" | -0.056 |
adies | -0.054 |
moral | -0.053 |
Fore | -0.053 |
Token | soaring |
Token ID | 32376 |
Feature activation | +9.60e-01 |
Pos logit contributions | |
?" | +0.006 |
oral | +0.006 |
adies | +0.006 |
?'" | +0.006 |
| +0.006 |
Neg logit contributions | |
Mine | -0.006 |
arrass | -0.006 |
tug | -0.006 |
oa | -0.006 |
whe | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | +3.65e-01 |
Pos logit contributions | |
tug | +0.018 |
whe | +0.018 |
Carl | +0.017 |
fought | +0.016 |
Mine | +0.016 |
Neg logit contributions | |
oral | -0.028 |
?'" | -0.027 |
adies | -0.026 |
moral | -0.026 |
invites | -0.026 |
Token | they |
Token ID | 484 |
Feature activation | +1.49e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.007 |
Mine | +0.007 |
arrass | +0.007 |
Carl | +0.007 |
Neg logit contributions | |
oral | -0.009 |
?'" | -0.009 |
invites | -0.009 |
adies | -0.009 |
Fore | -0.009 |
Token | were |
Token ID | 547 |
Feature activation | +9.58e-02 |
Pos logit contributions | |
tug | +0.004 |
whe | +0.003 |
Carl | +0.003 |
Mine | +0.003 |
fought | +0.003 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.003 |
adies | -0.003 |
moral | -0.003 |
Fore | -0.003 |
Token | many |
Token ID | 867 |
Feature activation | +3.26e-01 |
Pos logit contributions | |
oral | +0.003 |
?'" | +0.003 |
moral | +0.003 |
adies | +0.003 |
?" | +0.003 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
fought | -0.002 |
Token | people |
Token ID | 661 |
Feature activation | -2.80e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.007 |
moral | -0.007 |
adies | -0.007 |
?" | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | +4.24e-01 |
Pos logit contributions | |
?'" | +0.006 |
oral | +0.006 |
?" | +0.006 |
invites | +0.006 |
moral | +0.006 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.006 |
Mine | -0.006 |
arrass | -0.006 |
Token | The |
Token ID | 383 |
Feature activation | -3.51e-01 |
Pos logit contributions | |
Carl | +0.011 |
Mine | +0.011 |
whe | +0.011 |
tug | +0.011 |
Andy | +0.011 |
Neg logit contributions | |
adies | -0.009 |
oral | -0.009 |
?'" | -0.008 |
moral | -0.008 |
Fore | -0.008 |
Token | moral |
Token ID | 6573 |
Feature activation | -9.08e-01 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.007 |
adies | +0.007 |
Fore | +0.006 |
?" | +0.006 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.009 |
Mine | -0.009 |
Carl | -0.009 |
fought | -0.009 |
Token | of |
Token ID | 286 |
Feature activation | +3.01e+00 |
Pos logit contributions | |
?'" | +0.021 |
adies | +0.021 |
oral | +0.020 |
invites | +0.020 |
Him | +0.020 |
Neg logit contributions | |
whe | -0.021 |
tug | -0.020 |
Mine | -0.020 |
Carl | -0.019 |
Laser | -0.019 |
Token | this |
Token ID | 428 |
Feature activation | +2.50e+00 |
Pos logit contributions | |
tug | +0.057 |
Carl | +0.056 |
whe | +0.054 |
Mine | +0.052 |
Andy | +0.050 |
Neg logit contributions | |
oral | -0.085 |
adies | -0.083 |
?'" | -0.082 |
decides | -0.079 |
invites | -0.079 |
Token | story |
Token ID | 1621 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
tug | +0.093 |
whe | +0.091 |
Mine | +0.087 |
fought | +0.087 |
arrass | +0.086 |
Neg logit contributions | |
oral | -0.032 |
moral | -0.028 |
?'" | -0.027 |
?" | -0.026 |
Fore | -0.025 |
Token | is |
Token ID | 318 |
Feature activation | +1.62e+00 |
Pos logit contributions | |
Carl | +0.039 |
Mine | +0.039 |
whe | +0.039 |
tug | +0.039 |
fought | +0.038 |
Neg logit contributions | |
adies | -0.023 |
?'" | -0.021 |
oral | -0.021 |
Fore | -0.020 |
Him | -0.020 |
Token | that |
Token ID | 326 |
Feature activation | +5.41e-01 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.025 |
Mine | +0.023 |
fought | +0.022 |
Carl | +0.022 |
Neg logit contributions | |
oral | -0.053 |
?'" | -0.051 |
moral | -0.050 |
?" | -0.050 |
Fore | -0.049 |
Token | when |
Token ID | 618 |
Feature activation | +2.99e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.012 |
Mine | +0.011 |
Carl | +0.011 |
fought | +0.011 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.013 |
moral | -0.013 |
?" | -0.012 |
Fore | -0.012 |
Token | friend |
Token ID | 1545 |
Feature activation | -8.25e-01 |
Pos logit contributions | |
oral | +0.017 |
?'" | +0.017 |
adies | +0.016 |
Fore | +0.016 |
?" | +0.016 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.016 |
Carl | -0.016 |
Mine | -0.016 |
fought | -0.015 |
Token | . |
Token ID | 13 |
Feature activation | +5.02e-01 |
Pos logit contributions | |
oral | +0.018 |
?'" | +0.018 |
?" | +0.018 |
invites | +0.018 |
moral | +0.017 |
Neg logit contributions | |
tug | -0.020 |
whe | -0.019 |
Mine | -0.018 |
Carl | -0.018 |
oa | -0.018 |
Token |
|
Token ID | 198 |
Feature activation | +7.54e-01 |
Pos logit contributions | |
tug | +0.014 |
whe | +0.014 |
Carl | +0.014 |
Mine | +0.014 |
Kenny | +0.013 |
Neg logit contributions | |
oral | -0.010 |
adies | -0.009 |
?'" | -0.009 |
moral | -0.009 |
?" | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | +8.03e-01 |
Pos logit contributions | |
tug | +0.021 |
whe | +0.021 |
Carl | +0.020 |
Kenny | +0.019 |
Andy | +0.019 |
Neg logit contributions | |
adies | -0.016 |
decides | -0.015 |
airport | -0.015 |
Him | -0.014 |
?'" | -0.014 |
Token | After |
Token ID | 3260 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
tug | +0.022 |
whe | +0.022 |
Carl | +0.021 |
Mine | +0.021 |
ci | +0.021 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.015 |
moral | -0.015 |
Fore | -0.015 |
adies | -0.014 |
Token | what |
Token ID | 644 |
Feature activation | +3.00e+00 |
Pos logit contributions | |
arrass | +0.037 |
ci | +0.037 |
whe | +0.036 |
urt | +0.035 |
Mine | +0.034 |
Neg logit contributions | |
moral | -0.019 |
oral | -0.019 |
| -0.019 |
God | -0.018 |
Fore | -0.017 |
Token | seemed |
Token ID | 3947 |
Feature activation | +2.16e+00 |
Pos logit contributions | |
whe | +0.085 |
tug | +0.082 |
Mine | +0.077 |
arrass | +0.077 |
oa | +0.076 |
Neg logit contributions | |
oral | -0.062 |
moral | -0.057 |
?" | -0.057 |
?'" | -0.057 |
invites | -0.055 |
Token | like |
Token ID | 588 |
Feature activation | +1.31e+00 |
Pos logit contributions | |
whe | +0.045 |
tug | +0.043 |
oa | +0.043 |
ci | +0.042 |
Mine | +0.042 |
Neg logit contributions | |
?'" | -0.055 |
oral | -0.055 |
?" | -0.055 |
moral | -0.054 |
adies | -0.053 |
Token | forever |
Token ID | 8097 |
Feature activation | -1.44e-01 |
Pos logit contributions | |
tug | +0.029 |
whe | +0.029 |
Mine | +0.027 |
Carl | +0.026 |
arrass | +0.026 |
Neg logit contributions | |
oral | -0.033 |
?'" | -0.031 |
moral | -0.031 |
?" | -0.031 |
adies | -0.030 |
Token | , |
Token ID | 11 |
Feature activation | +7.23e-01 |
Pos logit contributions | |
oral | +0.003 |
?" | +0.003 |
?'" | +0.003 |
| +0.003 |
invites | +0.003 |
Neg logit contributions | |
whe | -0.004 |
Mine | -0.004 |
tug | -0.004 |
Carl | -0.004 |
ci | -0.003 |
Token | the |
Token ID | 262 |
Feature activation | -1.08e+00 |
Pos logit contributions | |
whe | +0.017 |
tug | +0.017 |
Mine | +0.016 |
iled | +0.016 |
arrass | +0.015 |
Neg logit contributions | |
oral | -0.016 |
?'" | -0.016 |
moral | -0.016 |
?" | -0.016 |
invites | -0.016 |
Token | car |
Token ID | 1097 |
Feature activation | -3.55e-01 |
Pos logit contributions | |
oral | +0.049 |
?'" | +0.047 |
adies | +0.046 |
Fore | +0.045 |
moral | +0.045 |
Neg logit contributions | |
tug | -0.044 |
whe | -0.042 |
Mine | -0.040 |
Carl | -0.040 |
fought | -0.039 |
Token | , |
Token ID | 11 |
Feature activation | +5.80e-01 |
Pos logit contributions | |
oral | +0.006 |
?'" | +0.005 |
adies | +0.005 |
moral | +0.005 |
?" | +0.005 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.011 |
Carl | -0.011 |
Mine | -0.011 |
fought | -0.010 |
Token | and |
Token ID | 290 |
Feature activation | -5.93e-02 |
Pos logit contributions | |
whe | +0.014 |
tug | +0.014 |
Mine | +0.013 |
Carl | +0.013 |
arrass | +0.013 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.013 |
moral | -0.013 |
?" | -0.013 |
Fore | -0.013 |
Token | the |
Token ID | 262 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
oral | +0.001 |
moral | +0.001 |
?'" | +0.001 |
?" | +0.001 |
Fore | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Mine | -0.001 |
Carl | -0.001 |
arrass | -0.001 |
Token | fire |
Token ID | 2046 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
oral | +0.041 |
?'" | +0.039 |
adies | +0.038 |
Fore | +0.037 |
?" | +0.037 |
Neg logit contributions | |
tug | -0.036 |
whe | -0.035 |
Mine | -0.033 |
Carl | -0.033 |
fought | -0.032 |
Token | hyd |
Token ID | 7409 |
Feature activation | +3.06e+00 |
Pos logit contributions | |
tug | +0.035 |
whe | +0.035 |
Carl | +0.034 |
Mine | +0.034 |
arrass | +0.034 |
Neg logit contributions | |
?" | -0.021 |
oral | -0.020 |
invites | -0.019 |
decides | -0.019 |
?'" | -0.019 |
Token | rant |
Token ID | 5250 |
Feature activation | -7.17e-01 |
Pos logit contributions | |
tug | +0.079 |
whe | +0.077 |
Mine | +0.073 |
fought | +0.073 |
Carl | +0.073 |
Neg logit contributions | |
oral | -0.067 |
?'" | -0.064 |
moral | -0.061 |
adies | -0.061 |
Fore | -0.061 |
Token | are |
Token ID | 389 |
Feature activation | -2.65e-01 |
Pos logit contributions | |
?" | +0.016 |
oral | +0.015 |
?'" | +0.015 |
invites | +0.015 |
decides | +0.015 |
Neg logit contributions | |
whe | -0.018 |
tug | -0.018 |
Carl | -0.017 |
Mine | -0.017 |
arrass | -0.016 |
Token | wet |
Token ID | 9583 |
Feature activation | -6.47e-02 |
Pos logit contributions | |
moral | +0.006 |
?" | +0.006 |
oral | +0.005 |
Fore | +0.005 |
?'" | +0.005 |
Neg logit contributions | |
whe | -0.007 |
ci | -0.007 |
Mine | -0.007 |
arrass | -0.007 |
Carl | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | -9.71e-01 |
Pos logit contributions | |
?" | +0.001 |
?'" | +0.001 |
oral | +0.001 |
adies | +0.001 |
moral | +0.001 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Carl | -0.001 |
Mine | -0.001 |
arrass | -0.001 |
Token | Lily |
Token ID | 20037 |
Feature activation | -1.08e+00 |
Pos logit contributions | |
oral | +0.022 |
adies | +0.020 |
?'" | +0.020 |
moral | +0.020 |
decides | +0.020 |
Neg logit contributions | |
Carl | -0.024 |
tug | -0.024 |
whe | -0.024 |
Mine | -0.024 |
Andy | -0.023 |
Token | smiled |
Token ID | 13541 |
Feature activation | -2.06e-01 |
Pos logit contributions | |
oral | +0.018 |
?'" | +0.017 |
invites | +0.017 |
?" | +0.017 |
moral | +0.017 |
Neg logit contributions | |
whe | -0.028 |
tug | -0.027 |
Mine | -0.026 |
Carl | -0.026 |
arrass | -0.026 |
Token | and |
Token ID | 290 |
Feature activation | +5.62e-01 |
Pos logit contributions | |
?'" | +0.003 |
?" | +0.003 |
moral | +0.003 |
Him | +0.003 |
oral | +0.003 |
Neg logit contributions | |
whe | -0.006 |
tug | -0.006 |
fought | -0.006 |
Carl | -0.006 |
Mine | -0.006 |
Token | thanked |
Token ID | 26280 |
Feature activation | +2.35e-01 |
Pos logit contributions | |
whe | +0.013 |
tug | +0.013 |
Mine | +0.013 |
Carl | +0.012 |
iled | +0.012 |
Neg logit contributions | |
invites | -0.012 |
?'" | -0.012 |
oral | -0.012 |
?" | -0.012 |
decides | -0.012 |
Token | the |
Token ID | 262 |
Feature activation | -4.13e-01 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.005 |
Mine | +0.005 |
Carl | +0.005 |
fought | +0.005 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.005 |
moral | -0.005 |
adies | -0.005 |
?" | -0.005 |
Token | mail |
Token ID | 6920 |
Feature activation | +5.05e-01 |
Pos logit contributions | |
oral | +0.008 |
?'" | +0.008 |
adies | +0.008 |
?" | +0.008 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.011 |
Mine | -0.011 |
Carl | -0.010 |
fought | -0.010 |
Token | man |
Token ID | 805 |
Feature activation | +5.79e-01 |
Pos logit contributions | |
tug | +0.016 |
disl | +0.015 |
Carl | +0.015 |
fought | +0.015 |
arrass | +0.014 |
Neg logit contributions | |
oral | -0.009 |
moral | -0.008 |
?'" | -0.008 |
invites | -0.008 |
Him | -0.008 |
Token | again |
Token ID | 757 |
Feature activation | -1.03e+00 |
Pos logit contributions | |
tug | +0.017 |
whe | +0.016 |
disl | +0.016 |
fought | +0.016 |
oa | +0.016 |
Neg logit contributions | |
oral | -0.011 |
moral | -0.010 |
invites | -0.010 |
Him | -0.010 |
?'" | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | +9.25e-01 |
Pos logit contributions | |
?'" | +0.019 |
?" | +0.019 |
oral | +0.018 |
moral | +0.018 |
God | +0.017 |
Neg logit contributions | |
whe | -0.030 |
Mine | -0.029 |
tug | -0.029 |
Carl | -0.028 |
Laser | -0.028 |
Token | |
Token ID | 220 |
Feature activation | +2.94e-01 |
Pos logit contributions | |
Carl | +0.021 |
Mine | +0.021 |
Andy | +0.021 |
whe | +0.021 |
Kenny | +0.020 |
Neg logit contributions | |
oral | -0.024 |
adies | -0.023 |
moral | -0.022 |
?'" | -0.022 |
Fore | -0.021 |
Token |
|
Token ID | 198 |
Feature activation | +9.39e-01 |
Pos logit contributions | |
whe | +0.008 |
Andy | +0.007 |
Carl | +0.007 |
Mine | +0.007 |
Kenny | +0.007 |
Neg logit contributions | |
oral | -0.007 |
moral | -0.007 |
airport | -0.006 |
adies | -0.006 |
decides | -0.006 |
Token |
|
Token ID | 198 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
whe | +0.025 |
Carl | +0.023 |
tug | +0.023 |
Kenny | +0.022 |
Andy | +0.022 |
Neg logit contributions | |
adies | -0.021 |
decides | -0.021 |
airport | -0.020 |
oral | -0.020 |
narrative | -0.019 |
Token | and |
Token ID | 290 |
Feature activation | -6.72e-02 |
Pos logit contributions | |
?" | +0.004 |
?'" | +0.004 |
moral | +0.004 |
oral | +0.004 |
adies | +0.003 |
Neg logit contributions | |
tug | -0.007 |
oa | -0.007 |
whe | -0.007 |
Mine | -0.007 |
arrass | -0.007 |
Token | the |
Token ID | 262 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
oral | +0.002 |
?'" | +0.002 |
adies | +0.002 |
moral | +0.002 |
Fore | +0.002 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | way |
Token ID | 835 |
Feature activation | -4.37e-01 |
Pos logit contributions | |
oral | +0.024 |
?'" | +0.024 |
adies | +0.023 |
Fore | +0.022 |
?" | +0.022 |
Neg logit contributions | |
tug | -0.027 |
whe | -0.026 |
fought | -0.024 |
Carl | -0.024 |
Mine | -0.024 |
Token | it |
Token ID | 340 |
Feature activation | +5.82e-01 |
Pos logit contributions | |
oral | +0.012 |
?'" | +0.011 |
moral | +0.011 |
adies | +0.011 |
Fore | +0.011 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.009 |
Carl | -0.009 |
Mine | -0.008 |
ci | -0.008 |
Token | was |
Token ID | 373 |
Feature activation | +5.78e-01 |
Pos logit contributions | |
tug | +0.014 |
whe | +0.014 |
Carl | +0.013 |
fought | +0.013 |
Mine | +0.013 |
Neg logit contributions | |
oral | -0.014 |
?'" | -0.013 |
adies | -0.013 |
moral | -0.013 |
Fore | -0.013 |
Token | singing |
Token ID | 13777 |
Feature activation | +7.08e-01 |
Pos logit contributions | |
tug | +0.012 |
whe | +0.012 |
Mine | +0.012 |
Carl | +0.011 |
fought | +0.011 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.015 |
?" | -0.014 |
moral | -0.014 |
adies | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +6.56e-01 |
Pos logit contributions | |
tug | +0.019 |
oa | +0.018 |
arrass | +0.018 |
Mine | +0.018 |
fought | +0.018 |
Neg logit contributions | |
?" | -0.013 |
?'" | -0.013 |
adies | -0.013 |
Prin | -0.013 |
moral | -0.013 |
Token | It |
Token ID | 632 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
Carl | +0.015 |
Andy | +0.015 |
whe | +0.014 |
Kenny | +0.014 |
Mine | +0.014 |
Neg logit contributions | |
oral | -0.017 |
adies | -0.017 |
moral | -0.016 |
airport | -0.016 |
embassy | -0.016 |
Token | made |
Token ID | 925 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
tug | +0.027 |
whe | +0.026 |
Mine | +0.025 |
Carl | +0.025 |
fought | +0.024 |
Neg logit contributions | |
oral | -0.021 |
?'" | -0.021 |
moral | -0.020 |
adies | -0.020 |
?" | -0.020 |
Token | the |
Token ID | 262 |
Feature activation | -4.12e-01 |
Pos logit contributions | |
tug | +0.025 |
whe | +0.024 |
Carl | +0.023 |
Mine | +0.023 |
fought | +0.022 |
Neg logit contributions | |
oral | -0.023 |
?'" | -0.022 |
adies | -0.021 |
moral | -0.021 |
?" | -0.021 |
Token | day |
Token ID | 1110 |
Feature activation | -6.10e-01 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.008 |
adies | +0.008 |
?" | +0.008 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.011 |
Carl | -0.010 |
fought | -0.010 |
Mine | -0.010 |
Token | he |
Token ID | 339 |
Feature activation | +4.11e-01 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.006 |
Mine | +0.006 |
Carl | +0.006 |
arrass | +0.006 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
moral | -0.008 |
?" | -0.008 |
adies | -0.008 |
Token | said |
Token ID | 531 |
Feature activation | -1.09e-02 |
Pos logit contributions | |
whe | +0.011 |
tug | +0.010 |
Mine | +0.010 |
Carl | +0.010 |
arrass | +0.010 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
?" | -0.008 |
moral | -0.008 |
decides | -0.008 |
Token | he |
Token ID | 339 |
Feature activation | -3.56e-01 |
Pos logit contributions | |
oral | +0.000 |
?'" | +0.000 |
?" | +0.000 |
moral | +0.000 |
adies | +0.000 |
Neg logit contributions | |
tug | -0.000 |
whe | -0.000 |
Mine | -0.000 |
Carl | -0.000 |
ci | -0.000 |
Token | was |
Token ID | 373 |
Feature activation | +3.30e-01 |
Pos logit contributions | |
oral | +0.006 |
decides | +0.006 |
?" | +0.006 |
moral | +0.006 |
?'" | +0.006 |
Neg logit contributions | |
whe | -0.010 |
tug | -0.010 |
Carl | -0.010 |
Mine | -0.010 |
arrass | -0.010 |
Token | still |
Token ID | 991 |
Feature activation | +1.89e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.007 |
Mine | +0.007 |
Carl | +0.007 |
ci | +0.007 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
?" | -0.008 |
moral | -0.008 |
adies | -0.007 |
Token | hungry |
Token ID | 14720 |
Feature activation | +6.69e-01 |
Pos logit contributions | |
whe | +0.005 |
tug | +0.004 |
oa | +0.004 |
Mine | +0.004 |
Kenny | +0.004 |
Neg logit contributions | |
?" | -0.005 |
?'" | -0.004 |
moral | -0.004 |
oral | -0.004 |
invites | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | -3.11e-02 |
Pos logit contributions | |
tug | +0.018 |
whe | +0.018 |
ci | +0.017 |
Carl | +0.017 |
Laser | +0.017 |
Neg logit contributions | |
?" | -0.013 |
?'" | -0.013 |
moral | -0.012 |
oral | -0.012 |
invites | -0.012 |
Token | When |
Token ID | 1649 |
Feature activation | -2.98e-01 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
?" | +0.001 |
moral | +0.001 |
Fore | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Carl | -0.001 |
Token | Peter |
Token ID | 5613 |
Feature activation | +1.40e+00 |
Pos logit contributions | |
moral | +0.005 |
?" | +0.005 |
God | +0.005 |
oral | +0.005 |
airport | +0.005 |
Neg logit contributions | |
iled | -0.009 |
arrass | -0.009 |
whe | -0.009 |
ked | -0.008 |
oa | -0.008 |
Token | finished |
Token ID | 5201 |
Feature activation | +1.85e-01 |
Pos logit contributions | |
whe | +0.038 |
tug | +0.038 |
Mine | +0.036 |
Carl | +0.036 |
arrass | +0.035 |
Neg logit contributions | |
oral | -0.028 |
?'" | -0.027 |
invites | -0.026 |
?" | -0.026 |
moral | -0.026 |
Token | the |
Token ID | 262 |
Feature activation | -2.41e-02 |
Pos logit contributions | |
whe | +0.005 |
tug | +0.004 |
arrass | +0.004 |
Mine | +0.004 |
ci | +0.004 |
Neg logit contributions | |
?" | -0.004 |
oral | -0.004 |
?'" | -0.004 |
moral | -0.004 |
invites | -0.004 |
Token | was |
Token ID | 373 |
Feature activation | +2.27e-01 |
Pos logit contributions | |
?" | +0.003 |
decides | +0.003 |
invites | +0.003 |
?'" | +0.003 |
moral | +0.003 |
Neg logit contributions | |
whe | -0.005 |
Mine | -0.005 |
urt | -0.005 |
tug | -0.005 |
Laser | -0.005 |
Token | surprised |
Token ID | 6655 |
Feature activation | +9.55e-01 |
Pos logit contributions | |
urt | +0.006 |
ci | +0.006 |
cream | +0.006 |
robat | +0.006 |
whe | +0.006 |
Neg logit contributions | |
?" | -0.004 |
moral | -0.004 |
invites | -0.004 |
Fore | -0.004 |
decides | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | +4.90e-01 |
Pos logit contributions | |
whe | +0.031 |
tug | +0.030 |
skate | +0.030 |
ci | +0.030 |
Mine | +0.030 |
Neg logit contributions | |
?" | -0.015 |
| -0.014 |
?'" | -0.013 |
ara | -0.012 |
Him | -0.012 |
Token | She |
Token ID | 1375 |
Feature activation | -1.40e-01 |
Pos logit contributions | |
Carl | +0.013 |
tug | +0.012 |
Mine | +0.012 |
whe | +0.011 |
Andy | +0.011 |
Neg logit contributions | |
oral | -0.011 |
adies | -0.011 |
Fore | -0.011 |
moral | -0.010 |
?'" | -0.010 |
Token | stood |
Token ID | 6204 |
Feature activation | -2.26e-01 |
Pos logit contributions | |
?" | +0.003 |
decides | +0.003 |
moral | +0.003 |
?'" | +0.003 |
invites | +0.003 |
Neg logit contributions | |
whe | -0.004 |
Mine | -0.003 |
tug | -0.003 |
urt | -0.003 |
ci | -0.003 |
Token | up |
Token ID | 510 |
Feature activation | +9.12e-01 |
Pos logit contributions | |
?" | +0.004 |
?'" | +0.004 |
moral | +0.004 |
oral | +0.004 |
adies | +0.004 |
Neg logit contributions | |
whe | -0.006 |
tug | -0.006 |
fought | -0.006 |
Mine | -0.006 |
arrass | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | -4.42e-02 |
Pos logit contributions | |
arrass | +0.028 |
whe | +0.028 |
fought | +0.028 |
disl | +0.028 |
cream | +0.028 |
Neg logit contributions | |
moral | -0.013 |
?" | -0.013 |
oral | -0.012 |
?'" | -0.012 |
| -0.012 |
Token | the |
Token ID | 262 |
Feature activation | -6.90e-01 |
Pos logit contributions | |
decides | +0.001 |
?" | +0.001 |
invites | +0.001 |
asks | +0.001 |
| +0.001 |
Neg logit contributions | |
whe | -0.001 |
Mine | -0.001 |
urt | -0.001 |
ello | -0.001 |
igsaw | -0.001 |
Token | dog |
Token ID | 3290 |
Feature activation | +7.12e-01 |
Pos logit contributions | |
oral | +0.014 |
moral | +0.014 |
Fore | +0.013 |
airport | +0.013 |
invites | +0.013 |
Neg logit contributions | |
Carl | -0.019 |
whe | -0.019 |
tug | -0.018 |
ci | -0.018 |
Mine | -0.017 |
Token | ran |
Token ID | 4966 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
igsaw | +0.021 |
urt | +0.020 |
ello | +0.020 |
disl | +0.020 |
whe | +0.020 |
Neg logit contributions | |
decides | -0.012 |
invites | -0.012 |
meets | -0.011 |
| -0.011 |
sees | -0.011 |
Token | away |
Token ID | 1497 |
Feature activation | -3.95e-01 |
Pos logit contributions | |
adies | +0.023 |
invites | +0.021 |
Prin | +0.021 |
| +0.021 |
decides | +0.021 |
Neg logit contributions | |
ci | -0.045 |
uc | -0.044 |
cream | -0.042 |
disl | -0.042 |
oa | -0.041 |
Token |
|
Token ID | 198 |
Feature activation | -4.76e-01 |
Pos logit contributions | |
adies | +0.007 |
decides | +0.007 |
oral | +0.007 |
airport | +0.006 |
invites | +0.006 |
Neg logit contributions | |
tug | -0.010 |
Carl | -0.010 |
whe | -0.010 |
Mine | -0.010 |
Kenny | -0.010 |
Token | The |
Token ID | 464 |
Feature activation | -1.18e+00 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.009 |
Prin | +0.008 |
Fore | +0.008 |
moral | +0.008 |
Neg logit contributions | |
tug | -0.013 |
whe | -0.013 |
Carl | -0.013 |
fought | -0.013 |
ci | -0.013 |
Token | little |
Token ID | 1310 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
oral | +0.026 |
?'" | +0.026 |
adies | +0.026 |
Fore | +0.024 |
invites | +0.024 |
Neg logit contributions | |
tug | -0.031 |
whe | -0.028 |
Mine | -0.028 |
Carl | -0.027 |
skate | -0.027 |
Token | boy |
Token ID | 2933 |
Feature activation | -5.25e-01 |
Pos logit contributions | |
oral | +0.059 |
?'" | +0.056 |
adies | +0.055 |
Fore | +0.054 |
moral | +0.053 |
Neg logit contributions | |
tug | -0.078 |
whe | -0.074 |
Mine | -0.072 |
Carl | -0.072 |
fought | -0.070 |
Token | said |
Token ID | 531 |
Feature activation | -1.25e-01 |
Pos logit contributions | |
?" | +0.011 |
oral | +0.010 |
?'" | +0.010 |
moral | +0.010 |
decides | +0.010 |
Neg logit contributions | |
whe | -0.014 |
Mine | -0.013 |
tug | -0.013 |
Carl | -0.012 |
oa | -0.012 |
Token | , |
Token ID | 11 |
Feature activation | +6.45e-01 |
Pos logit contributions | |
oral | +0.003 |
?'" | +0.003 |
?" | +0.003 |
adies | +0.003 |
moral | +0.003 |
Neg logit contributions | |
tug | -0.003 |
whe | -0.003 |
Mine | -0.003 |
Carl | -0.003 |
fought | -0.003 |
Token | " |
Token ID | 366 |
Feature activation | -6.97e-02 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.019 |
Mine | +0.018 |
Carl | +0.018 |
fought | +0.018 |
Neg logit contributions | |
oral | -0.011 |
?'" | -0.011 |
?" | -0.010 |
moral | -0.010 |
adies | -0.010 |
Token | I |
Token ID | 40 |
Feature activation | -2.09e+00 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
adies | +0.001 |
moral | +0.001 |
?" | +0.001 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Mine | -0.002 |
Carl | -0.002 |
fought | -0.002 |
Token | lost |
Token ID | 2626 |
Feature activation | -5.96e-01 |
Pos logit contributions | |
oral | +0.047 |
?'" | +0.045 |
adies | +0.044 |
moral | +0.044 |
?" | +0.043 |
Neg logit contributions | |
tug | -0.052 |
whe | -0.051 |
Carl | -0.049 |
Mine | -0.049 |
fought | -0.047 |
Token | my |
Token ID | 616 |
Feature activation | -7.20e-01 |
Pos logit contributions | |
?" | +0.012 |
?'" | +0.011 |
oral | +0.011 |
adies | +0.011 |
moral | +0.011 |
Neg logit contributions | |
whe | -0.017 |
tug | -0.017 |
fought | -0.015 |
Mine | -0.015 |
arrass | -0.015 |
Token | favorite |
Token ID | 4004 |
Feature activation | -7.05e-02 |
Pos logit contributions | |
oral | +0.019 |
moral | +0.019 |
?" | +0.018 |
?'" | +0.018 |
invites | +0.018 |
Neg logit contributions | |
whe | -0.015 |
tug | -0.015 |
Carl | -0.014 |
iled | -0.014 |
Mine | -0.013 |
Token | fountain |
Token ID | 29420 |
Feature activation | -2.02e-01 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.011 |
Mine | +0.010 |
Carl | +0.010 |
fought | +0.010 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.005 |
adies | -0.005 |
moral | -0.005 |
Fore | -0.005 |
Token | in |
Token ID | 287 |
Feature activation | -3.72e-01 |
Pos logit contributions | |
?" | +0.004 |
?'" | +0.004 |
oral | +0.004 |
moral | +0.004 |
adies | +0.004 |
Neg logit contributions | |
tug | -0.005 |
whe | -0.005 |
Carl | -0.005 |
Mine | -0.005 |
Kenny | -0.005 |
Token | the |
Token ID | 262 |
Feature activation | -5.08e-02 |
Pos logit contributions | |
oral | +0.011 |
?'" | +0.010 |
adies | +0.010 |
Him | +0.010 |
Fore | +0.010 |
Neg logit contributions | |
tug | -0.007 |
whe | -0.007 |
Carl | -0.007 |
Mine | -0.007 |
Andy | -0.006 |
Token | park |
Token ID | 3952 |
Feature activation | -1.33e+00 |
Pos logit contributions | |
oral | +0.001 |
adies | +0.001 |
Him | +0.001 |
?'" | +0.001 |
moral | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
baseball | -0.001 |
Mine | -0.001 |
Carl | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +2.55e-02 |
Pos logit contributions | |
oral | +0.034 |
?'" | +0.033 |
adies | +0.032 |
?" | +0.032 |
moral | +0.032 |
Neg logit contributions | |
tug | -0.030 |
whe | -0.028 |
Carl | -0.027 |
Mine | -0.027 |
fought | -0.026 |
Token | One |
Token ID | 1881 |
Feature activation | +1.16e-01 |
Pos logit contributions | |
Carl | +0.001 |
Mine | +0.001 |
Laser | +0.000 |
baseball | +0.000 |
Spike | +0.000 |
Neg logit contributions | |
oral | -0.001 |
adies | -0.001 |
priest | -0.001 |
Fore | -0.001 |
airport | -0.001 |
Token | day |
Token ID | 1110 |
Feature activation | +2.11e-01 |
Pos logit contributions | |
tug | +0.003 |
whe | +0.003 |
Mine | +0.003 |
fought | +0.003 |
Carl | +0.003 |
Neg logit contributions | |
oral | -0.002 |
moral | -0.002 |
?" | -0.002 |
?'" | -0.002 |
Fore | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | -2.55e-01 |
Pos logit contributions | |
tug | +0.004 |
Carl | +0.004 |
Mine | +0.003 |
whe | +0.003 |
fought | +0.003 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.006 |
airport | -0.006 |
moral | -0.006 |
invites | -0.006 |
Token | a |
Token ID | 257 |
Feature activation | -3.40e-01 |
Pos logit contributions | |
oral | +0.007 |
airport | +0.007 |
adies | +0.006 |
?'" | +0.006 |
Him | +0.006 |
Neg logit contributions | |
tug | -0.006 |
Carl | -0.006 |
Mine | -0.005 |
baseball | -0.005 |
Laser | -0.005 |
Token | stranger |
Token ID | 16195 |
Feature activation | +3.67e-01 |
Pos logit contributions | |
oral | +0.007 |
?'" | +0.006 |
adies | +0.006 |
moral | +0.006 |
?" | +0.006 |
Neg logit contributions | |
tug | -0.010 |
whe | -0.009 |
Carl | -0.009 |
Mine | -0.009 |
fought | -0.009 |
Token | approached |
Token ID | 10448 |
Feature activation | +5.50e-01 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.010 |
Carl | +0.010 |
fought | +0.010 |
Mine | +0.010 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
moral | -0.006 |
adies | -0.006 |
Fore | -0.006 |
Token | lots |
Token ID | 6041 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.006 |
Carl | +0.006 |
Mine | +0.006 |
fought | +0.006 |
Neg logit contributions | |
oral | -0.007 |
?'" | -0.007 |
adies | -0.007 |
moral | -0.007 |
?" | -0.007 |
Token | of |
Token ID | 286 |
Feature activation | +4.77e-02 |
Pos logit contributions | |
oral | +0.003 |
?'" | +0.003 |
moral | +0.003 |
adies | +0.003 |
?" | +0.003 |
Neg logit contributions | |
tug | -0.005 |
whe | -0.005 |
Carl | -0.005 |
Mine | -0.005 |
fought | -0.005 |
Token | fun |
Token ID | 1257 |
Feature activation | -9.96e-02 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
fought | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
adies | -0.001 |
moral | -0.001 |
Fore | -0.001 |
Token | for |
Token ID | 329 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
?" | +0.002 |
oral | +0.002 |
?'" | +0.002 |
invites | +0.002 |
moral | +0.002 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
arrass | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | +4.05e-01 |
Pos logit contributions | |
whe | +0.039 |
tug | +0.038 |
Mine | +0.037 |
Carl | +0.036 |
arrass | +0.036 |
Neg logit contributions | |
oral | -0.026 |
moral | -0.026 |
?" | -0.025 |
?'" | -0.025 |
Fore | -0.025 |
Token | little |
Token ID | 1310 |
Feature activation | -6.18e-02 |
Pos logit contributions | |
tug | +0.010 |
whe | +0.010 |
Carl | +0.010 |
Mine | +0.010 |
fought | +0.009 |
Neg logit contributions | |
oral | -0.009 |
moral | -0.009 |
?'" | -0.008 |
adies | -0.008 |
invites | -0.008 |
Token | girl |
Token ID | 2576 |
Feature activation | -6.27e-01 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
moral | +0.001 |
invites | +0.001 |
?" | +0.001 |
Neg logit contributions | |
tug | -0.002 |
whe | -0.002 |
Carl | -0.002 |
Mine | -0.002 |
urt | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | +6.44e-01 |
Pos logit contributions | |
?'" | +0.014 |
?" | +0.014 |
oral | +0.013 |
invites | +0.013 |
moral | +0.013 |
Neg logit contributions | |
tug | -0.016 |
whe | -0.015 |
Carl | -0.014 |
Mine | -0.014 |
oa | -0.014 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +4.98e-02 |
Pos logit contributions | |
Mine | +0.017 |
Carl | +0.017 |
whe | +0.017 |
Kenny | +0.016 |
tug | +0.016 |
Neg logit contributions | |
adies | -0.014 |
oral | -0.013 |
?'" | -0.012 |
airport | -0.012 |
moral | -0.012 |
Token | Once |
Token ID | 7454 |
Feature activation | +5.71e-02 |
Pos logit contributions | |
Carl | +0.002 |
whe | +0.002 |
Mine | +0.002 |
Kenny | +0.001 |
Andy | +0.001 |
Neg logit contributions | |
oral | -0.001 |
Him | -0.001 |
adies | -0.001 |
moral | -0.001 |
decides | -0.001 |
Token | upon |
Token ID | 2402 |
Feature activation | +1.70e-03 |
Pos logit contributions | |
Carl | +0.002 |
Andy | +0.002 |
Tim | +0.002 |
Liz | +0.001 |
Kenny | +0.001 |
Neg logit contributions | |
decides | -0.001 |
adies | -0.001 |
oral | -0.001 |
Prin | -0.001 |
embassy | -0.001 |
Token | closer |
Token ID | 5699 |
Feature activation | -6.52e-01 |
Pos logit contributions | |
arrass | +0.028 |
tug | +0.028 |
Mine | +0.028 |
cream | +0.027 |
urt | +0.027 |
Neg logit contributions | |
?" | -0.021 |
moral | -0.020 |
?'" | -0.020 |
decides | -0.018 |
invites | -0.018 |
Token | , |
Token ID | 11 |
Feature activation | +2.77e-02 |
Pos logit contributions | |
oral | +0.018 |
?'" | +0.017 |
adies | +0.017 |
moral | +0.017 |
Fore | +0.017 |
Neg logit contributions | |
tug | -0.013 |
whe | -0.012 |
Carl | -0.012 |
Mine | -0.012 |
fought | -0.011 |
Token | so |
Token ID | 523 |
Feature activation | -8.22e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Mine | +0.001 |
Carl | +0.001 |
urt | +0.000 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
moral | -0.001 |
?" | -0.001 |
adies | -0.001 |
Token | they |
Token ID | 484 |
Feature activation | -1.43e+00 |
Pos logit contributions | |
oral | +0.017 |
?'" | +0.016 |
moral | +0.016 |
?" | +0.016 |
adies | +0.016 |
Neg logit contributions | |
tug | -0.022 |
whe | -0.022 |
Mine | -0.021 |
Carl | -0.020 |
arrass | -0.020 |
Token | climbed |
Token ID | 19952 |
Feature activation | -1.59e+00 |
Pos logit contributions | |
oral | +0.035 |
?'" | +0.034 |
adies | +0.033 |
moral | +0.033 |
Fore | +0.032 |
Neg logit contributions | |
tug | -0.033 |
whe | -0.032 |
Carl | -0.031 |
Mine | -0.030 |
fought | -0.030 |
Token | on |
Token ID | 319 |
Feature activation | +3.70e-01 |
Pos logit contributions | |
oral | +0.045 |
?'" | +0.043 |
adies | +0.041 |
moral | +0.041 |
invites | +0.041 |
Neg logit contributions | |
tug | -0.032 |
whe | -0.031 |
Carl | -0.030 |
Mine | -0.029 |
fought | -0.028 |
Token | the |
Token ID | 262 |
Feature activation | +1.79e-01 |
Pos logit contributions | |
whe | +0.011 |
fought | +0.011 |
arrass | +0.011 |
tug | +0.011 |
iled | +0.011 |
Neg logit contributions | |
moral | -0.006 |
Fore | -0.006 |
oral | -0.006 |
| -0.006 |
airport | -0.005 |
Token | fence |
Token ID | 13990 |
Feature activation | +1.75e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.004 |
Carl | +0.004 |
Mine | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.004 |
moral | -0.004 |
adies | -0.004 |
?" | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | -3.86e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.004 |
Mine | +0.004 |
Carl | +0.004 |
arrass | +0.004 |
Neg logit contributions | |
oral | -0.004 |
?'" | -0.004 |
?" | -0.003 |
adies | -0.003 |
moral | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | -5.29e-01 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.009 |
adies | +0.009 |
moral | +0.008 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.010 |
Mine | -0.009 |
Carl | -0.009 |
whe | -0.009 |
Kenny | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | -6.76e-01 |
Pos logit contributions | |
adies | +0.015 |
airport | +0.015 |
decides | +0.015 |
narrative | +0.015 |
Him | +0.015 |
Neg logit contributions | |
tug | -0.010 |
Carl | -0.010 |
Andy | -0.010 |
Kenny | -0.009 |
Tim | -0.009 |
Token | thankful |
Token ID | 25535 |
Feature activation | +1.32e-01 |
Pos logit contributions | |
tug | +0.020 |
whe | +0.020 |
Mine | +0.019 |
Carl | +0.019 |
arrass | +0.018 |
Neg logit contributions | |
oral | -0.020 |
?'" | -0.019 |
moral | -0.019 |
?" | -0.018 |
Fore | -0.018 |
Token | to |
Token ID | 284 |
Feature activation | +6.15e-01 |
Pos logit contributions | |
whe | +0.004 |
tug | +0.004 |
Mine | +0.003 |
fought | +0.003 |
oa | +0.003 |
Neg logit contributions | |
?" | -0.003 |
oral | -0.003 |
moral | -0.003 |
?'" | -0.003 |
Fore | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | -6.46e-01 |
Pos logit contributions | |
tug | +0.012 |
Carl | +0.012 |
whe | +0.011 |
Andy | +0.011 |
fought | +0.011 |
Neg logit contributions | |
oral | -0.018 |
adies | -0.017 |
?'" | -0.017 |
Fore | -0.016 |
Prin | -0.016 |
Token | puppy |
Token ID | 26188 |
Feature activation | +3.04e-01 |
Pos logit contributions | |
moral | +0.017 |
oral | +0.016 |
invites | +0.015 |
decides | +0.015 |
Fore | +0.014 |
Neg logit contributions | |
iled | -0.015 |
whe | -0.015 |
urt | -0.014 |
fought | -0.014 |
tug | -0.014 |
Token | for |
Token ID | 329 |
Feature activation | +1.28e+00 |
Pos logit contributions | |
whe | +0.010 |
tug | +0.009 |
oa | +0.009 |
disl | +0.009 |
Mine | +0.009 |
Neg logit contributions | |
moral | -0.005 |
invites | -0.004 |
?'" | -0.004 |
?" | -0.004 |
oral | -0.004 |
Token | understanding |
Token ID | 4547 |
Feature activation | +2.65e-02 |
Pos logit contributions | |
whe | +0.029 |
ci | +0.029 |
urt | +0.029 |
Mine | +0.028 |
iled | +0.028 |
Neg logit contributions | |
moral | -0.032 |
God | -0.030 |
Fore | -0.029 |
oral | -0.029 |
Him | -0.028 |
Token | him |
Token ID | 683 |
Feature activation | +7.86e-01 |
Pos logit contributions | |
whe | +0.001 |
tug | +0.001 |
ci | +0.001 |
Mine | +0.001 |
Carl | +0.001 |
Neg logit contributions | |
oral | -0.001 |
moral | -0.001 |
?" | -0.001 |
?'" | -0.001 |
decides | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +2.24e-01 |
Pos logit contributions | |
whe | +0.025 |
tug | +0.024 |
fought | +0.023 |
Mine | +0.023 |
oa | +0.023 |
Neg logit contributions | |
moral | -0.013 |
oral | -0.012 |
?" | -0.012 |
?'" | -0.011 |
Him | -0.011 |
Token | making |
Token ID | 1642 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
whe | +0.005 |
tug | +0.005 |
Mine | +0.005 |
iled | +0.005 |
urt | +0.005 |
Neg logit contributions | |
oral | -0.005 |
moral | -0.005 |
invites | -0.005 |
?'" | -0.005 |
?" | -0.005 |
Token | him |
Token ID | 683 |
Feature activation | +1.53e-01 |
Pos logit contributions | |
whe | +0.027 |
tug | +0.027 |
iled | +0.026 |
fought | +0.026 |
ci | +0.026 |
Neg logit contributions | |
invites | -0.025 |
moral | -0.024 |
oral | -0.024 |
Him | -0.023 |
Fore | -0.023 |
Token | feel |
Token ID | 1254 |
Feature activation | -4.25e-01 |
Pos logit contributions | |
oa | +0.005 |
tug | +0.005 |
Mine | +0.005 |
whe | +0.005 |
Carl | +0.005 |
Neg logit contributions | |
moral | -0.002 |
oral | -0.002 |
?" | -0.002 |
invites | -0.002 |
?'" | -0.002 |
Token | she |
Token ID | 673 |
Feature activation | -7.83e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.013 |
Mine | +0.012 |
Carl | +0.012 |
fought | +0.012 |
Neg logit contributions | |
?'" | -0.011 |
oral | -0.011 |
?" | -0.011 |
moral | -0.011 |
invites | -0.010 |
Token | ran |
Token ID | 4966 |
Feature activation | -8.06e-01 |
Pos logit contributions | |
oral | +0.023 |
?'" | +0.021 |
adies | +0.021 |
moral | +0.021 |
Fore | +0.021 |
Neg logit contributions | |
tug | -0.017 |
whe | -0.015 |
fought | -0.014 |
Carl | -0.014 |
Mine | -0.014 |
Token | to |
Token ID | 284 |
Feature activation | -5.50e-01 |
Pos logit contributions | |
?" | +0.014 |
| +0.012 |
invites | +0.012 |
adies | +0.012 |
?'" | +0.012 |
Neg logit contributions | |
fought | -0.023 |
disl | -0.022 |
ci | -0.022 |
uc | -0.022 |
oa | -0.022 |
Token | the |
Token ID | 262 |
Feature activation | +1.59e-01 |
Pos logit contributions | |
oral | +0.015 |
?'" | +0.014 |
adies | +0.014 |
Fore | +0.014 |
moral | +0.014 |
Neg logit contributions | |
tug | -0.013 |
whe | -0.012 |
Carl | -0.011 |
Mine | -0.011 |
skate | -0.011 |
Token | door |
Token ID | 3420 |
Feature activation | -9.22e-01 |
Pos logit contributions | |
tug | +0.004 |
whe | +0.004 |
Mine | +0.004 |
Carl | +0.004 |
fought | +0.004 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
adies | -0.003 |
moral | -0.003 |
Fore | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | -3.32e-01 |
Pos logit contributions | |
oral | +0.018 |
?" | +0.018 |
?'" | +0.018 |
moral | +0.017 |
adies | +0.017 |
Neg logit contributions | |
tug | -0.024 |
Carl | -0.024 |
Mine | -0.023 |
whe | -0.023 |
fought | -0.023 |
Token | waited |
Token ID | 13488 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
?" | +0.007 |
invites | +0.007 |
?'" | +0.007 |
decides | +0.006 |
| +0.006 |
Neg logit contributions | |
whe | -0.008 |
Mine | -0.008 |
tug | -0.008 |
Carl | -0.008 |
iled | -0.008 |
Token | for |
Token ID | 329 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
oral | +0.043 |
?'" | +0.040 |
Fore | +0.039 |
moral | +0.039 |
adies | +0.039 |
Neg logit contributions | |
tug | -0.033 |
whe | -0.032 |
Carl | -0.030 |
Mine | -0.030 |
fought | -0.028 |
Token | her |
Token ID | 607 |
Feature activation | +2.21e-01 |
Pos logit contributions | |
whe | +0.003 |
tug | +0.003 |
ci | +0.003 |
iled | +0.003 |
fought | +0.003 |
Neg logit contributions | |
adies | -0.002 |
oral | -0.002 |
?'" | -0.002 |
?" | -0.002 |
moral | -0.002 |
Token | mum |
Token ID | 25682 |
Feature activation | +3.47e-01 |
Pos logit contributions | |
whe | +0.007 |
iled | +0.006 |
arrass | +0.006 |
tug | +0.006 |
Carl | +0.006 |
Neg logit contributions | |
?" | -0.004 |
moral | -0.004 |
oral | -0.003 |
?'" | -0.003 |
invites | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | +5.43e-01 |
Pos logit contributions | |
tug | +0.008 |
whe | +0.008 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
?" | -0.008 |
adies | -0.008 |
moral | -0.008 |
Token | . |
Token ID | 13 |
Feature activation | +7.22e-01 |
Pos logit contributions | |
?" | +0.001 |
?'" | +0.001 |
| +0.001 |
oral | +0.001 |
moral | +0.001 |
Neg logit contributions | |
tug | -0.004 |
subtract | -0.003 |
whe | -0.003 |
Mine | -0.003 |
arrass | -0.003 |
Token | Every |
Token ID | 3887 |
Feature activation | +6.07e-01 |
Pos logit contributions | |
tug | +0.022 |
whe | +0.020 |
ci | +0.020 |
fought | +0.019 |
iled | +0.019 |
Neg logit contributions | |
?" | -0.013 |
?'" | -0.013 |
| -0.013 |
adies | -0.012 |
invites | -0.012 |
Token | day |
Token ID | 1110 |
Feature activation | -4.56e-01 |
Pos logit contributions | |
tug | +0.014 |
whe | +0.013 |
Carl | +0.013 |
Mine | +0.013 |
skate | +0.013 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
adies | -0.014 |
decides | -0.013 |
invites | -0.013 |
Token | , |
Token ID | 11 |
Feature activation | -3.18e-01 |
Pos logit contributions | |
oral | +0.010 |
?" | +0.010 |
?'" | +0.010 |
adies | +0.010 |
moral | +0.010 |
Neg logit contributions | |
tug | -0.011 |
whe | -0.011 |
Carl | -0.010 |
Mine | -0.010 |
ci | -0.010 |
Token | he |
Token ID | 339 |
Feature activation | -8.24e-01 |
Pos logit contributions | |
oral | +0.009 |
?'" | +0.009 |
adies | +0.008 |
Fore | +0.008 |
moral | +0.008 |
Neg logit contributions | |
tug | -0.006 |
whe | -0.006 |
Carl | -0.006 |
Mine | -0.006 |
fought | -0.006 |
Token | would |
Token ID | 561 |
Feature activation | -1.89e+00 |
Pos logit contributions | |
oral | +0.019 |
adies | +0.017 |
?'" | +0.017 |
Fore | +0.016 |
Him | +0.016 |
Neg logit contributions | |
tug | -0.022 |
fought | -0.022 |
skate | -0.020 |
whe | -0.020 |
ci | -0.019 |
Token | stare |
Token ID | 24170 |
Feature activation | -1.14e+00 |
Pos logit contributions | |
oral | +0.052 |
?'" | +0.049 |
adies | +0.048 |
Fore | +0.048 |
moral | +0.047 |
Neg logit contributions | |
tug | -0.041 |
whe | -0.038 |
Carl | -0.036 |
Mine | -0.036 |
fought | -0.035 |
Token | long |
Token ID | 890 |
Feature activation | +8.43e-01 |
Pos logit contributions | |
oral | +0.037 |
Fore | +0.034 |
?'" | +0.034 |
adies | +0.033 |
moral | +0.033 |
Neg logit contributions | |
tug | -0.019 |
whe | -0.016 |
Carl | -0.016 |
Mine | -0.016 |
urt | -0.015 |
Token | ingly |
Token ID | 4420 |
Feature activation | -1.76e-01 |
Pos logit contributions | |
whe | +0.024 |
tug | +0.023 |
Carl | +0.023 |
Mine | +0.022 |
arrass | +0.022 |
Neg logit contributions | |
?" | -0.016 |
?'" | -0.015 |
adies | -0.015 |
moral | -0.015 |
invites | -0.015 |
Token | at |
Token ID | 379 |
Feature activation | +1.16e+00 |
Pos logit contributions | |
?'" | +0.004 |
?" | +0.004 |
oral | +0.004 |
adies | +0.004 |
moral | +0.004 |
Neg logit contributions | |
whe | -0.004 |
tug | -0.004 |
Carl | -0.004 |
Mine | -0.004 |
fought | -0.004 |
Token | the |
Token ID | 262 |
Feature activation | -4.39e-01 |
Pos logit contributions | |
tug | +0.021 |
Carl | +0.019 |
Mine | +0.018 |
Andy | +0.018 |
whe | +0.018 |
Neg logit contributions | |
oral | -0.037 |
?'" | -0.034 |
adies | -0.034 |
Fore | -0.033 |
Prin | -0.033 |
Token | it |
Token ID | 340 |
Feature activation | +2.34e-01 |
Pos logit contributions | |
tug | +0.015 |
whe | +0.015 |
fought | +0.014 |
Carl | +0.014 |
Mine | +0.014 |
Neg logit contributions | |
oral | -0.014 |
?'" | -0.014 |
?" | -0.014 |
moral | -0.014 |
Fore | -0.013 |
Token | with |
Token ID | 351 |
Feature activation | -5.23e-01 |
Pos logit contributions | |
tug | +0.005 |
whe | +0.005 |
Carl | +0.005 |
Mine | +0.005 |
fought | +0.005 |
Neg logit contributions | |
oral | -0.006 |
?'" | -0.006 |
adies | -0.006 |
Fore | -0.006 |
moral | -0.006 |
Token | coins |
Token ID | 10796 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
oral | +0.013 |
?'" | +0.012 |
adies | +0.012 |
moral | +0.012 |
Fore | +0.012 |
Neg logit contributions | |
tug | -0.012 |
whe | -0.011 |
Mine | -0.011 |
Carl | -0.011 |
fought | -0.011 |
Token | ?". |
Token ID | 43634 |
Feature activation | -4.40e-02 |
Pos logit contributions | |
oral | +0.024 |
?'" | +0.023 |
adies | +0.022 |
moral | +0.022 |
?" | +0.022 |
Neg logit contributions | |
tug | -0.026 |
whe | -0.025 |
Carl | -0.024 |
Mine | -0.024 |
fought | -0.023 |
Token | The |
Token ID | 383 |
Feature activation | -9.38e-01 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
adies | +0.001 |
moral | +0.001 |
?" | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | little |
Token ID | 1310 |
Feature activation | -2.12e+00 |
Pos logit contributions | |
oral | +0.019 |
?'" | +0.019 |
adies | +0.018 |
Fore | +0.017 |
?" | +0.017 |
Neg logit contributions | |
tug | -0.027 |
whe | -0.025 |
Carl | -0.024 |
Mine | -0.024 |
fought | -0.024 |
Token | boy |
Token ID | 2933 |
Feature activation | -7.16e-01 |
Pos logit contributions | |
oral | +0.029 |
?" | +0.029 |
moral | +0.028 |
?'" | +0.028 |
invites | +0.028 |
Neg logit contributions | |
whe | -0.070 |
tug | -0.067 |
Mine | -0.065 |
oa | -0.065 |
Carl | -0.065 |
Token | was |
Token ID | 373 |
Feature activation | +3.36e-02 |
Pos logit contributions | |
moral | +0.013 |
oral | +0.012 |
?" | +0.012 |
decides | +0.012 |
?'" | +0.012 |
Neg logit contributions | |
whe | -0.021 |
Laser | -0.020 |
Mine | -0.019 |
Carl | -0.019 |
robat | -0.019 |
Token | so |
Token ID | 523 |
Feature activation | +5.02e-01 |
Pos logit contributions | |
tug | +0.001 |
whe | +0.001 |
Carl | +0.001 |
Mine | +0.001 |
urt | +0.001 |
Neg logit contributions | |
oral | -0.001 |
?'" | -0.001 |
moral | -0.001 |
?" | -0.001 |
Fore | -0.001 |
Token | excited |
Token ID | 6568 |
Feature activation | +4.27e-01 |
Pos logit contributions | |
tug | +0.011 |
whe | +0.011 |
Carl | +0.010 |
Mine | +0.010 |
ci | +0.010 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.013 |
moral | -0.012 |
?" | -0.012 |
Fore | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | -5.74e-01 |
Pos logit contributions | |
tug | +0.012 |
whe | +0.012 |
Mine | +0.012 |
ci | +0.012 |
oa | +0.011 |
Neg logit contributions | |
?" | -0.008 |
| -0.007 |
?'" | -0.007 |
oral | -0.006 |
invites | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | +5.37e-01 |
Pos logit contributions | |
tug | +0.018 |
Carl | +0.016 |
whe | +0.016 |
Mine | +0.016 |
fought | +0.015 |
Neg logit contributions | |
oral | -0.028 |
?'" | -0.027 |
moral | -0.026 |
Fore | -0.026 |
invites | -0.026 |
Token | a |
Token ID | 257 |
Feature activation | +2.46e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.012 |
Carl | +0.012 |
Mine | +0.012 |
fought | +0.011 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.012 |
adies | -0.012 |
?" | -0.012 |
moral | -0.012 |
Token | little |
Token ID | 1310 |
Feature activation | -2.56e-01 |
Pos logit contributions | |
tug | +0.007 |
whe | +0.007 |
Mine | +0.006 |
Carl | +0.006 |
fought | +0.006 |
Neg logit contributions | |
oral | -0.005 |
?'" | -0.005 |
adies | -0.005 |
moral | -0.004 |
invites | -0.004 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.47e+00 |
Pos logit contributions | |
oral | +0.004 |
?'" | +0.004 |
?" | +0.004 |
moral | +0.004 |
adies | +0.004 |
Neg logit contributions | |
tug | -0.008 |
whe | -0.008 |
Carl | -0.008 |
Mine | -0.008 |
fought | -0.008 |
Token | named |
Token ID | 3706 |
Feature activation | -5.04e-01 |
Pos logit contributions | |
oral | +0.042 |
?'" | +0.038 |
moral | +0.038 |
Him | +0.037 |
adies | +0.037 |
Neg logit contributions | |
tug | -0.032 |
fought | -0.029 |
iled | -0.029 |
Carl | -0.028 |
ci | -0.027 |
Token | Sus |
Token ID | 8932 |
Feature activation | -1.88e+00 |
Pos logit contributions | |
oral | +0.015 |
?'" | +0.014 |
moral | +0.014 |
airport | +0.013 |
Fore | +0.013 |
Neg logit contributions | |
tug | -0.010 |
Carl | -0.009 |
whe | -0.009 |
Mine | -0.009 |
Andy | -0.009 |
Token | ie |
Token ID | 494 |
Feature activation | -1.19e+00 |
Pos logit contributions | |
oral | +0.065 |
moral | +0.059 |
vet | +0.057 |
meantime | +0.057 |
embassy | +0.056 |
Neg logit contributions | |
ci | -0.030 |
fought | -0.026 |
cube | -0.024 |
iled | -0.024 |
organized | -0.023 |
Token | was |
Token ID | 373 |
Feature activation | +4.08e-01 |
Pos logit contributions | |
oral | +0.032 |
?'" | +0.028 |
moral | +0.028 |
adies | +0.027 |
Him | +0.027 |
Neg logit contributions | |
tug | -0.030 |
fought | -0.028 |
iled | -0.026 |
ci | -0.026 |
Carl | -0.026 |
Token | walking |
Token ID | 6155 |
Feature activation | -4.81e-02 |
Pos logit contributions | |
tug | +0.006 |
whe | +0.006 |
Mine | +0.005 |
Carl | +0.005 |
fought | +0.005 |
Neg logit contributions | |
oral | -0.015 |
?'" | -0.014 |
adies | -0.013 |
decides | -0.013 |
moral | -0.013 |
Token | in |
Token ID | 287 |
Feature activation | -1.39e-02 |
Pos logit contributions | |
?" | +0.001 |
| +0.001 |
moral | +0.001 |
invites | +0.001 |
adies | +0.001 |
Neg logit contributions | |
oa | -0.001 |
tug | -0.001 |
Mine | -0.001 |
urt | -0.001 |
arrass | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | +3.99e-01 |
Pos logit contributions | |
oral | +0.000 |
?" | +0.000 |
?'" | +0.000 |
| +0.000 |
moral | +0.000 |
Neg logit contributions | |
whe | -0.000 |
tug | -0.000 |
Mine | -0.000 |
urt | -0.000 |
Carl | -0.000 |
Token | the |
Token ID | 262 |
Feature activation | -1.99e+00 |
Pos logit contributions | |
moral | +0.010 |
oral | +0.010 |
?" | +0.010 |
?'" | +0.010 |
Fore | +0.010 |
Neg logit contributions | |
whe | -0.013 |
tug | -0.013 |
arrass | -0.012 |
Mine | -0.012 |
iled | -0.012 |
Token | truck |
Token ID | 7779 |
Feature activation | +8.82e-01 |
Pos logit contributions | |
oral | +0.046 |
?'" | +0.044 |
adies | +0.043 |
moral | +0.043 |
Fore | +0.042 |
Neg logit contributions | |
tug | -0.049 |
whe | -0.047 |
Carl | -0.045 |
Mine | -0.045 |
fought | -0.044 |
Token | was |
Token ID | 373 |
Feature activation | -3.40e-01 |
Pos logit contributions | |
disl | +0.027 |
Carl | +0.027 |
whe | +0.027 |
tug | +0.026 |
latable | +0.026 |
Neg logit contributions | |
moral | -0.012 |
oral | -0.012 |
?" | -0.012 |
decides | -0.011 |
airport | -0.010 |
Token | flexible |
Token ID | 12846 |
Feature activation | +3.89e-01 |
Pos logit contributions | |
moral | +0.008 |
?" | +0.008 |
?'" | +0.007 |
oral | +0.007 |
Fore | +0.007 |
Neg logit contributions | |
whe | -0.008 |
ci | -0.008 |
oa | -0.008 |
Mine | -0.008 |
Carl | -0.008 |
Token | and |
Token ID | 290 |
Feature activation | -3.49e-01 |
Pos logit contributions | |
whe | +0.011 |
oa | +0.011 |
tug | +0.011 |
arrass | +0.011 |
Carl | +0.011 |
Neg logit contributions | |
?" | -0.007 |
moral | -0.006 |
?'" | -0.006 |
decides | -0.006 |
invites | -0.006 |
Token | could |
Token ID | 714 |
Feature activation | -2.11e+00 |
Pos logit contributions | |
moral | +0.008 |
?" | +0.007 |
invites | +0.007 |
decides | +0.007 |
Fore | +0.007 |
Neg logit contributions | |
whe | -0.008 |
tug | -0.008 |
iled | -0.007 |
Carl | -0.007 |
urt | -0.007 |
Token | handle |
Token ID | 5412 |
Feature activation | -3.08e-02 |
Pos logit contributions | |
oral | +0.057 |
?'" | +0.056 |
moral | +0.054 |
?" | +0.054 |
adies | +0.054 |
Neg logit contributions | |
tug | -0.042 |
whe | -0.041 |
Carl | -0.039 |
Mine | -0.039 |
fought | -0.037 |
Token | it |
Token ID | 340 |
Feature activation | +5.82e-01 |
Pos logit contributions | |
oral | +0.001 |
?'" | +0.001 |
?" | +0.001 |
adies | +0.001 |
moral | +0.001 |
Neg logit contributions | |
tug | -0.001 |
whe | -0.001 |
Carl | -0.001 |
Mine | -0.001 |
fought | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -3.73e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.013 |
Carl | +0.012 |
Mine | +0.012 |
fought | +0.012 |
Neg logit contributions | |
oral | -0.014 |
?'" | -0.014 |
?" | -0.014 |
moral | -0.013 |
adies | -0.013 |
Token | |
Token ID | 220 |
Feature activation | -4.99e-01 |
Pos logit contributions | |
oral | +0.009 |
adies | +0.008 |
?'" | +0.008 |
moral | +0.008 |
Fore | +0.008 |
Neg logit contributions | |
tug | -0.009 |
whe | -0.009 |
Carl | -0.009 |
Mine | -0.009 |
Kenny | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | -3.32e-01 |
Pos logit contributions | |
adies | +0.013 |
embassy | +0.013 |
airport | +0.013 |
decides | +0.012 |
bride | +0.012 |
Neg logit contributions | |
Andy | -0.011 |
whe | -0.011 |
Carl | -0.011 |
Kenny | -0.011 |
Mine | -0.010 |
Token | my |
Token ID | 1820 |
Feature activation | +1.36e+00 |
Pos logit contributions | |
tug | +0.045 |
Mine | +0.043 |
whe | +0.042 |
resses | +0.041 |
Carl | +0.041 |
Neg logit contributions | |
decides | -0.010 |
oral | -0.009 |
moral | -0.008 |
?'" | -0.008 |
Fore | -0.008 |
Token | visited |
Token ID | 8672 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
Mine | +0.044 |
igsaw | +0.044 |
Carl | +0.043 |
tug | +0.042 |
resses | +0.042 |
Neg logit contributions | |
decides | -0.023 |
moral | -0.019 |
oral | -0.018 |
would | -0.017 |
?" | -0.016 |
Token | the |
Token ID | 262 |
Feature activation | +5.34e-01 |
Pos logit contributions | |
tug | +0.026 |
whe | +0.025 |
Carl | +0.024 |
Mine | +0.023 |
fought | +0.023 |
Neg logit contributions | |
oral | -0.034 |
?'" | -0.033 |
adies | -0.032 |
moral | -0.031 |
Fore | -0.031 |
Token | park |
Token ID | 3952 |
Feature activation | -9.37e-01 |
Pos logit contributions | |
tug | +0.013 |
whe | +0.013 |
Carl | +0.012 |
Mine | +0.012 |
fought | +0.012 |
Neg logit contributions | |
oral | -0.013 |
?'" | -0.012 |
adies | -0.012 |
moral | -0.012 |
Fore | -0.012 |
Token | every |
Token ID | 790 |
Feature activation | +1.23e+00 |
Pos logit contributions | |
oral | +0.024 |
?'" | +0.022 |
moral | +0.022 |
invites | +0.022 |
adies | +0.022 |
Neg logit contributions | |
tug | -0.022 |
whe | -0.020 |
Mine | -0.020 |
Carl | -0.019 |
arrass | -0.018 |
Token | day |
Token ID | 1110 |
Feature activation | -1.90e+00 |
Pos logit contributions | |
tug | +0.023 |
fought | +0.020 |
Carl | +0.019 |
baseball | +0.018 |
Andy | +0.018 |
Neg logit contributions | |
?'" | -0.036 |
adies | -0.036 |
oral | -0.036 |
Him | -0.035 |
Prin | -0.034 |
Token | to |
Token ID | 284 |
Feature activation | +3.27e-01 |
Pos logit contributions | |
?'" | +0.032 |
Him | +0.032 |
invites | +0.031 |
adies | +0.031 |
?" | +0.031 |
Neg logit contributions | |
tug | -0.052 |
arrass | -0.051 |
cream | -0.051 |
Kenny | -0.051 |
whe | -0.051 |
Token | see |
Token ID | 766 |
Feature activation | -1.49e+00 |
Pos logit contributions | |
tug | +0.007 |
whe | +0.007 |
Carl | +0.007 |
Mine | +0.007 |
fought | +0.007 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
adies | -0.008 |
Fore | -0.008 |
moral | -0.008 |
Token | if |
Token ID | 611 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
oral | +0.040 |
?'" | +0.038 |
adies | +0.038 |
moral | +0.037 |
Fore | +0.036 |
Neg logit contributions | |
tug | -0.032 |
whe | -0.030 |
Carl | -0.029 |
Mine | -0.029 |
fought | -0.028 |
Token | they |
Token ID | 484 |
Feature activation | +3.84e-01 |
Pos logit contributions | |
tug | +0.003 |
whe | +0.002 |
Carl | +0.002 |
Mine | +0.002 |
fought | +0.002 |
Neg logit contributions | |
oral | -0.003 |
?'" | -0.003 |
adies | -0.002 |
moral | -0.002 |
?" | -0.002 |
Token | could |
Token ID | 714 |
Feature activation | -6.68e-01 |
Pos logit contributions | |
tug | +0.010 |
whe | +0.010 |
Mine | +0.009 |
Carl | +0.009 |
arrass | +0.009 |
Neg logit contributions | |
oral | -0.008 |
?'" | -0.008 |
moral | -0.008 |
?" | -0.008 |
adies | -0.008 |