HNOI2019白兔之舞 单位根反演

[HNOI2019]白兔之舞 这个套路那个套路 叠加起来就是毒瘤题 各种奇怪东西加在一起的题。 考虑钦定走了$m$步的方案数。令$f_m$为连着走了$m$步的方案: $$\sum_{i=0}^L \binom{i-1}{L-1} f_m = \binom{L}{m} f_m $$ 显然$f$k可以矩乘优化求。令$S$为初始矩阵,$M$为转移矩阵: $$f_i = (SM^i)_y$$ $$ans_t = \sum_{i=0}^L [i \mod K = t] \binom{L}{i} f_i$$ 看到整除根据套路单位根反演: $$ans_t = \sum_{i=0}^L \frac{1}{K} \sum_{j < K} \omega_{K}^{j(i-t)} \binom{L}{i} f_i$$ 再套路的化简一波: $$=\frac{1}{K} \sum_{j < K } \omega_{K}^{-jt} (S \sum_{i=0}^L \binom{L}{i} M^i \omega_{K}^{ji} )_y$$ 后面第二个$\sum$里的东西可以二项式定理 $$=\frac{1}{K} \sum_{j < K } \omega_{K}^{-jt} (S (M\omega_{K}^j+I)^L)_y$$ 显然可以倍增随便求 $$=\frac{1}{K} \sum_{j < K } \omega_{K}^{-jt} V_j$$ 因为模数不是质数,考虑Bluestein’s Algorithm。 $$=\frac{1}{K} \sum_{j < K } \omega_{K}^{\binom{j}{2} + \binom{t}{2} - \binom{j+t}{2}} V_j$$ $$=\frac{1}{K} \omega_{K}^{\binom{t}{2}} \sum_{j < K } \omega_{K}^{\binom{j}{2}} V_j \omega_{K}^{-\binom{j+k}{2}} $$ 那么后面这个显然是卷积,卷一卷就可以做了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#include <bits/stdc++.h>

using std::cerr;
using std::endl;

const double PI = std::acos(-1);

struct comp {
double x, y;
};

inline comp operator+(const comp &a, const comp &b) {
return { a.x + b.x, a.y + b.y };
}

inline comp operator-(const comp &a, const comp &b) {
return { a.x - b.x, a.y - b.y };
}

inline comp operator*(const comp &a, const comp &b) {
return { a.x * b.x - a.y * b.y, a.y * b.x + a.x * b.y };
}

inline comp operator/(const comp &a, double b) {
return { a.x / b, a.y / b };
}

inline comp conj(const comp &a) {
return { a.x, -a.y };
}

const int N = 3e5 + 233;

int lim, rev[N];
comp w1[N], w2[N];

inline void init(int n) {
lim = 1;
while (lim < n) lim <<= 1;
for (int i = 0; i < lim; ++i) {
rev[i] = (rev[i >> 1] >> 1) ((i & 1) ? (lim >> 1) : 0);
w1[i] = { std::cos(PI * 2 * i / lim), std::sin(PI * 2 * i / lim) };
w2[i] = conj(w1[i]);
}
}

inline void DFT(comp f[], int o) {
for (int i = 0; i < lim; ++i)
if (i < rev[i]) std::swap(f[i], f[rev[i]]);
for (int i = 1; i < lim; i <<= 1) {
for (int j = 0; j < lim; j += i << 1) {
comp *p = o == 1 ? w1 : w2;
for (int k = 0; k < i; ++k, p += lim / i / 2) {
comp nx = f[j + k], ny = f[i + j + k] * *p;
f[j + k] = nx + ny;
f[i + j + k] = nx - ny;
}
}
}
if (o == -1)
for (int i = 0; i < lim; ++i)
f[i] = f[i] / lim;
}

int P;

inline void MTT(long long F[], long long G[], long long R[]) {
static comp A[N], B[N]; int M = (1 << 15) - 1;
for (int i = 0; i < lim; ++i) {
A[i] = { F[i] & M, F[i] >> 15 };
B[i] = { G[i] & M, G[i] >> 15 };
}
DFT(A, 1), DFT(B, 1);
static comp tA[N], tB[N], tC[N], tD[N];
for (int i = 0; i < lim; ++i) {
int j = (lim - i) & (lim - 1);
comp ta = (A[i] + conj(A[j])) * comp { 0.5, 0 },
tb = (A[i] - conj(A[j])) * comp { 0, -0.5 },
tc = (B[i] + conj(B[j])) * comp { 0.5, 0 },
td = (B[i] - conj(B[j])) * comp { 0, -0.5 };
tA[i] = ta * tc; tB[i] = ta * td;
tC[i] = tb * tc; tD[i] = tb * td;
}
for (int i = 0; i < lim; ++i) {
A[i] = tA[i] + tB[i] * comp { 0, 1 };
B[i] = tC[i] + tD[i] * comp { 0, 1 };
}
DFT(A, -1), DFT(B, -1);
for (int i = 0; i < lim; ++i) {
long long ta = (long long)(A[i].x + 0.5) % P;
long long tb = (long long)(A[i].y + 0.5) % P,
tc = (long long)(B[i].x + 0.5) % P,
td = (long long)(B[i].y + 0.5) % P;
R[i] = (ta + ((tb + tc) << 15) + (td << 30)) % P;
}
}

int n, K, L, X, Y;
long long omega;

struct Matrix {
long long mat[3][3];
};

Matrix def, mat_I, mat_S;

inline Matrix operator*(const Matrix &a, const Matrix &b) {
Matrix ret; memset(ret.mat, 0, sizeof(ret.mat));
for (int i = 0; i < 3; ++i)
for (int k = 0; k < 3; ++k)
for (int j = 0; j < 3; ++j)
ret.mat[i][j] += a.mat[i][k] * b.mat[k][j];
for (int i = 0; i < 3; ++i)
for (int j = 0; j < 3; ++j)
ret.mat[i][j] %= P;
return ret;
}

inline long long fpow(long long x, long long y) {
long long ret = 1;
for ( ; y; y >>= 1, x = x * x % P)
if (y & 1) ret = ret * x % P;
return ret;
}

inline int get_root() {
static int buc[N], tot;
for (int i = 2; i * i <= P - 1; ++i) {
if (!((P - 1) % i)) {
buc[++tot] = i;
if (i * i != P - 1)
buc[++tot] = (P - 1) / i;
}
}
for (int i = 1; i <= P - 1; ++i) {
int ok = 1;
for (int j = 1; j <= tot; ++j)
if (fpow(i, buc[j]) == 1) {
ok = 0; break;
}
if (ok) return i;
}
return -114514;
}


inline Matrix fpow(Matrix x, int y) {
Matrix ret = x;
for (--y; y; y >>= 1, x = x * x)
if (y & 1) ret = ret * x;
return ret;
}

inline Matrix calc_A(int x) {
Matrix qwq; memset(qwq.mat, 0, sizeof(qwq.mat));
long long w = fpow(omega, x);
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
qwq.mat[i][j] = (def.mat[i][j] * w + mat_I.mat[i][j]) % P;
return fpow(qwq, L);
}

long long A[N], B[N], C[N];

int main() {
// freopen("data", "r", stdin);
std::cin >> n >> K >> L >> X >> Y >> P;
omega = fpow(get_root(), (P - 1) / K);
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
std::cin >> def.mat[i][j];
for (int i = 0; i < n; ++i)
mat_I.mat[i][i] = 1;
mat_S.mat[0][X - 1] = 1;
for (int i = 0; i < K; ++i)
A[i] = (mat_S * calc_A(i)).mat[0][Y - 1];
for (int i = 0; i < K; ++i)
A[i] = A[i] * fpow(omega, 1ll * i * (i - 1) / 2) % P;
for (int i = 0; i <= K * 2; ++i)
B[K * 2 - i] = fpow(fpow(omega, 1ll * i * (i - 1) / 2), P - 2);
init(K * 2 + 5);
MTT(A, B, C);
for (int i = 0; i < K; ++i) {
long long ans = fpow(K, P - 2) * fpow(omega, 1ll * i * (i - 1) / 2)
% P * C[K * 2 - i] % P;
printf("%lld\n", ans);
}
return 0;
}