0%

题目链接:Codeforces 86D

莫队算法模板题,渐进时间复杂度 \(O(n\sqrt{n})\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = int(2E5) + 5;
const int MAX_Q = int(2E5) + 5;
const int MAX_S = int(1E6) + 5;
int n, t, a[MAX_N];
int bsz, bt, lb[MAX_N], rb[MAX_N], blk[MAX_N];
struct Query {
int l, r, id;
LL ans;
inline void input(int x) {
id = x; ans = 0;
scanf("%d%d", &l, &r);
}
inline friend bool operator < (const Query &u, const Query &v) {
if (blk[u.l] == blk[v.l]) return u.r < v.r;
else return blk[u.l] < blk[v.l];
}
} q[MAX_Q];
int cnt[MAX_S];
LL sum;
inline void add(int x) {
LL ori = 1LL * cnt[x] * cnt[x] * x;
++cnt[x];
LL tar = 1LL * cnt[x] * cnt[x] * x;
sum += (tar - ori);
}
inline void remove(int x) {
LL ori = 1LL * cnt[x] * cnt[x] * x;
--cnt[x];
LL tar = 1LL * cnt[x] * cnt[x] * x;
sum += (tar - ori);
}
int main() {
scanf("%d%d", &n, &t);
for (int i = 1; i <= n; ++i) scanf("%d", a + i);
for (int i = 1; i <= t; ++i) q[i].input(i);
bsz = int(sqrt(n));
int l, r;
for (l = 1; l <= n; l = r + 1) {
r = min(n, l + bsz - 1); ++bt;
lb[bt] = l; rb[bt] = r;
for (int i = l; i <= r; ++i) blk[i] = bt;
}
sort(q + 1, q + t + 1);
int curL = 0, curR = 0;
for (int i = 1; i <= t; ++i) {
while (curL < q[i].l) remove(a[curL]), ++curL;
while (curL > q[i].l) --curL, add(a[curL]);
while (curR < q[i].r) ++curR, add(a[curR]);
while (curR > q[i].r) remove(a[curR]), --curR;
q[i].ans = sum;
}
sort(q + 1, q + t + 1, [](const Query &u, const Query &v) { return u.id < v.id; });
for (int i = 1; i <= t; ++i) printf("%lld\n", q[i].ans);
return 0;
}

题目链接:ICPC 2019 上海网络赛B题

对区间 \([l,r]\) 离散化成四个点 \(l - 1\)\(l\)\(r-1\)\(r\),然后分块维护,渐进时间复杂度 \(O(T \times (4m \log 4m + 4m \sqrt{4m}))\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = 1000 + 5;
int T, n, m, d[MAX_N * 4], opL[MAX_N], opR[MAX_N], tot, dL[MAX_N], dR[MAX_N];
int bsz, bt, lb[MAX_N], rb[MAX_N], blk[MAX_N * 4], tag[MAX_N], a[MAX_N * 4];
int main() {
scanf("%d", &T);
for (int cs = 1; cs <= T; ++cs) {
scanf("%d%d", &n, &m);
tot = 0;
d[++tot] = 0; d[++tot] = n;
for (int i = 1; i <= m; ++i) {
scanf("%d%d", opL + i, opR + i);
d[++tot] = opL[i]; d[++tot] = opL[i] - 1;
d[++tot] = opR[i]; d[++tot] = opR[i] - 1;
}
sort(d + 1, d + tot + 1);
tot = unique(d + 1, d + tot + 1) - d - 1;
for (int i = 1; i <= m; ++i) {
dL[i] = lower_bound(d + 1, d + tot + 1, opL[i]) - d;
dR[i] = lower_bound(d + 1, d + tot + 1, opR[i]) - d;
}
bsz = ceil(int(sqrt(tot))); bt = 0;
int bl, br;
for (bl = 1; bl <= tot; bl = br + 1) {
br = min(tot, bl + bsz - 1); ++bt;
lb[bt] = bl; rb[bt] = br; tag[bt] = 0;
for (int i = bl; i <= br; ++i) {
blk[i] = bt; a[i] = 0;
}
}
int sb, eb;
for (int q = 1; q <= m; ++q) {
sb = blk[dL[q]]; eb = blk[dR[q]];
if (sb == eb) {
for (int i = dL[q]; i <= dR[q]; ++i) ++a[i];
continue;
}
for (int i = dL[q]; i <= rb[sb]; ++i) ++a[i];
for (int i = sb + 1; i < eb; ++i) ++tag[i];
for (int i = lb[eb]; i <= dR[q]; ++i) ++a[i];
}
int cnt = 0, tl, tr, len;
for (int i = 1; i <= tot; ++i) {
// printf("d[%d] = %d, a = %d\n", i, d[i], a[i] + tag[blk[i]]);
if ((a[i] + tag[blk[i]]) & 1) {
tl = d[i - 1] + 1;
tr = d[i];
len = max(tr - tl + 1, 0);
cnt += len;
}
}
printf("Case #%d: %d\n", cs, cnt);
}
return 0;
}

题目链接:HDU 6534

考虑莫队算法,对 \(a_i\)\(a_i + k\)\(a_i - k\) 进行离散化,用树状数组维护一个值域的前缀和。渐进时间复杂度 \(O(n \times \sqrt{n} \times \log n)\)

这里要注意我们对于每一个 \(a_i\),要先预处理出 \(a_i - k\)\(a_i\)\(a_i + k\) 离散化之后的值,然后在 add 和 remove 操作的时候就可以 \(O(1)\) 得到它们离散化之后的结果,否则会TLE。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
inline void getInt(int &x) {
x = 0; char ch = getchar(); int sgn = 1;
while (!isdigit(ch)) { if (ch == '-') sgn = -1; ch = getchar(); }
while (isdigit(ch)) { x = x * 10 + ch - '0'; ch = getchar(); }
x *= sgn;
}
const int MAX_N = 27000 + 5;
int n, m, k, a[MAX_N];
int qId[MAX_N], qIdSub[MAX_N], qIdAdd[MAX_N];
int d[MAX_N * 3], tot;
int c[MAX_N * 3];
inline int getId(const int &u) {
return lower_bound(d + 1, d + tot + 1, u) - d;
}
inline int lowbit(const int &x) {
return x & (-x);
}
inline void update(int pos, const int &v) {
while (pos <= tot) {
c[pos] += v;
pos += lowbit(pos);
}
}
inline int query(int pos) {
int ret = 0;
while (pos >= 1) {
ret += c[pos];
pos -= lowbit(pos);
}
return ret;
}
int bsz, bt, lb[MAX_N], rb[MAX_N], blk[MAX_N];
struct Query {
int l, r, id;
friend inline bool operator < (const Query &u, const Query &v) {
if (blk[u.l] == blk[v.l]) return u.r < v.r;
else return blk[u.l] < blk[v.l];
}
} q[MAX_N];
int ans[MAX_N];
int sum;
inline void add(const int &u) {
if (u == 0) return;
sum += (query(qIdAdd[u]) - query(qIdSub[u] - 1));
update(qId[u], 1);
}
inline void remove(const int &u) {
if (u == 0) return;
update(qId[u], -1);
sum -= (query(qIdAdd[u]) - query(qIdSub[u] - 1));
}
int main() {
getInt(n), getInt(m), getInt(k);
for (int i = 1; i <= n; ++i) getInt(a[i]);
for (int i = 1; i <= n; ++i) {
d[++tot] = a[i] - k;
d[++tot] = a[i];
d[++tot] = a[i] + k;
}
sort(d + 1, d + tot + 1);
tot = unique(d + 1, d + tot + 1) - d - 1;
for (int i = 1; i <= n; ++i) {
qId[i] = getId(a[i]);
qIdSub[i] = getId(a[i] - k);
qIdAdd[i] = getId(a[i] + k);
}
bsz = ceil(sqrt(n));
int bl, br;
for (bl = 1; bl <= n; bl = br + 1) {
++bt;
br = min(bl + bsz - 1, n);
for (int i = bl; i <= br; ++i) blk[i] = bt;
}
for (int i = 1; i <= m; ++i) {
q[i].id = i;
getInt(q[i].l), getInt(q[i].r);
}
sort(q + 1, q + m + 1);
a[0] = -1;
int curL = 0, curR = 0;
for (int i = 1; i <= m; ++i) {
while (curL < q[i].l) remove(curL), ++curL;
while (curL > q[i].l) --curL, add(curL);
while (curR < q[i].r) ++curR, add(curR);
while (curR > q[i].r) remove(curR), --curR;
ans[q[i].id] = sum;
}
for (int i = 1; i <= m; ++i) printf("%d\n", ans[i]);
return 0;
}

题目链接:BZOJ 2038

莫队算法模板题,将原序列分成 \(\sqrt{n}\) 块,同一块内的询问按照右端点大小排序,离线询问,渐进时间复杂度 \(O(n \times \sqrt{n})\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = 50000 + 5;
int n, m, c[MAX_N];
int bsz, lb[MAX_N], rb[MAX_N], blk[MAX_N], tot;
struct Query {
int l, r, id;
friend bool operator < (const Query &u, const Query &v) {
if (blk[u.l] != blk[v.l]) return blk[u.l] < blk[v.l];
else return u.r < v.r;
}
} q[MAX_N];
LL num[MAX_N], sum;
LL ansU[MAX_N], ansD[MAX_N];
inline void add(int cid) {
if (cid == 0) return;
LL ori = num[cid] * (num[cid] - 1) / 2;
++num[cid];
LL tar = num[cid] * (num[cid] - 1) / 2;
sum += (tar - ori);
}
inline void remove(int cid) {
if (cid == 0) return;
LL ori = num[cid] * (num[cid] - 1) / 2;
--num[cid];
LL tar = num[cid] * (num[cid] - 1) / 2;
sum += (tar - ori);
}
int main() {
scanf("%d%d", &n, &m);
for (int i = 1; i <= n; ++i) scanf("%d", c + i);
bsz = round(sqrt(n));
int l, r;
for (l = 1; l <= n; l = r + 1) {
++tot;
r = min(l + bsz - 1, n);
for (int i = l; i <= r; ++i) blk[i] = tot;
lb[tot] = l; rb[tot] = r;
}
/*
for (int i = 1; i <= n; ++i) {
printf("%d => %d-th block\n", i, blk[i]);
}
*/
for (int i = 1; i <= m; ++i) {
q[i].id = i;
scanf("%d%d", &q[i].l, &q[i].r);
}
sort(q + 1, q + m + 1);
int curL = 0, curR = 0;
for (int i = 1; i <= m; ++i) {
// printf("sum = %d, curL = %d, curR = %d\n", sum, curL, curR);
while (curL < q[i].l) remove(c[curL]), ++curL;
while (curL > q[i].l) --curL, add(c[curL]);
while (curR < q[i].r) ++curR, add(c[curR]);
while (curR > q[i].r) remove(c[curR]), --curR;
LL len = q[i].r - q[i].l + 1;
LL u = len * (len - 1) / 2;
if (u == 0 || sum == 0) {
ansU[q[i].id] = 0;
ansD[q[i].id] = 1;
} else {
LL g = __gcd(u, sum);
ansU[q[i].id] = sum / g;
ansD[q[i].id] = u / g;
}
}
for (int i = 1; i <= m; ++i) printf("%lld/%lld\n", ansU[i], ansD[i]);
return 0;
}

题目链接:Codeforce 1217E

首先我们考虑一个Balanced Multiset总和的每个位置可能存在两种情况:

  • 总和\(s_i\)就是这个集合内某个数字\(x\)的第\(i\)位,除了\(x\)之外的其余数字的第\(i\)位均为\(0\)
  • 总和\(s_i\)就是这个集合内某个数字\(x\)的第\(i\)位,但是不满足除了\(x\)之外的其余数字的第\(i\)位均为\(0\),所以\(\sum x_i \bmod 10 = s_i\),这种情况下一定会产生进位
  • 经过分析可以知道第二种情况是不可能的,我们可以这样反证:如果第\(i\)位置是这种情况,第\(i+1\)位是会接受到第\(i\)位的进位,那么第\(i+1\)位就不可能是第一种情况,即第\(i+1\)位会产生进位,以此类推,最前面一位也会产生进位,假设这个进位的位置为\(p\),这显然是不符合定义的,因为原集合当中\(p\)位置都是\(0\),故不可能成立。

因此我们可以得到一个推论,任何一个Unbalanced Multiset,无论怎么添加元素,这个集合依旧是Unbalanced。那么我们要找一个和最小的Unbalanced Multiset,其实也就是找一个两个元素的集合,使得这两个元素不满足上述的第一类情况。简而言之,对于每个询问,我们需要找到一组\((a_i,a_j)\),满足\(l \leq i < j \leq r\),使得第一类Balanced条件被破坏,最小的\(a_i+a_j\)就是问题的答案。

考虑用线段树维护区间的一些信息。我们可以把一个十进制数按位置拆分,对于要维护的区间用一个长度为\(\lceil \log_{10} \max\{ a_i \} \rceil\)的数组来记录区间内的数中,那些不为\(0\)的位置被哪些数使用,这些数的最小值是多少。接着还需要一个变量来维护区间最小的\(a_i+a_j\)。在Push-Up操作的时候,第一个数组很好维护,我们考虑如何维护第二个量,这里记录为\(f_o\),首先\(f \leq \min \{ f_l, f_r \}\)\(f_o\)\(f_l\)\(f_r\)分别是线段树当前的根、左子树和右子树维护的\(a_i+a_j\)的值,这意味着在左半个区间选择两个或者在右半个区间选择两个,当然我们也可以在左区间选择一个,右区间选择一个,这个时候就要用我们维护的第一个量来更新第二个量,详见代码。

渐进时间复杂度\(O((n+m)\log n \times \lceil \log_{10} \max\{ a_i \} \rceil)\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = 200000 + 5;
const int MAX_S = 10;
const int INF = INT_MAX;
int tree[MAX_N * 4][MAX_S], opt[MAX_N * 4];
inline int ls(int o) {
return o << 1;
}
inline int rs(int o) {
return o << 1 | 1;
}
inline void pushUp(int o) {
opt[o] = min(opt[ls(o)], opt[rs(o)]);
for (int i = 0; i < 10; ++i) {
tree[o][i] = min(tree[ls(o)][i], tree[rs(o)][i]);
if (tree[ls(o)][i] != INF && tree[rs(o)][i] != INF) {
opt[o] = min(opt[o], tree[ls(o)][i] + tree[rs(o)][i]);
}
}
}
void build(int o, int l, int r) {
if (l == r) {
opt[o] = INF;
for (int i = 0; i < 10; ++i) {
tree[o][i] = INF;
}
return;
}
int mid = (l + r) >> 1;
build(ls(o), l, mid);
build(rs(o), mid + 1, r);
pushUp(o);
}
void update(int o, int l, int r, int pos, int val) {
if (l == r) {
int v = val;
for (int i = 0; i < 10; ++i) {
int u = v % 10;
if (!u) tree[o][i] = INF;
else tree[o][i] = val;
v /= 10;
}
return;
}
int mid = (l + r) >> 1;
if (pos <= mid) update(ls(o), l, mid, pos, val);
else update(rs(o), mid + 1, r, pos, val);
pushUp(o);
}
struct Node {
int uOpt, uT[MAX_S];
Node() {
for (int i = 0; i < 10; ++i) uT[i] = INF;
uOpt = INF;
}
};
Node query(int o, int l, int r, int L, int R) {
if (L <= l && r <= R) {
Node u;
for (int i = 0; i < 10; ++i) {
u.uT[i] = tree[o][i];
}
u.uOpt = opt[o];
return u;
}
int mid = (l + r) >> 1;
if (R <= mid) return query(ls(o), l, mid, L, R);
if (L > mid) return query(rs(o), mid + 1, r, L, R);
Node uL = query(ls(o), l, mid, L, R), uR = query(rs(o), mid + 1, r, L, R);
Node u;
u.uOpt = min(uL.uOpt, uR.uOpt);
for (int i = 0; i < 10; ++i) {
u.uT[i] = min(uL.uT[i], uR.uT[i]);
if (uL.uT[i] != INF && uR.uT[i] != INF) u.uOpt = min(u.uOpt, uL.uT[i] + uR.uT[i]);
}
return u;
}
int n, m;
int a[MAX_N];
int main() {
scanf("%d%d", &n, &m);
build(1, 1, n);
for (int i = 1; i <= n; ++i) scanf("%d", a + i), update(1, 1, n, i, a[i]);
for (int i = 1; i <= m; ++i) {
int op, u, v;
scanf("%d%d%d", &op, &u, &v);
if (op == 1) update(1, 1, n, u, v);
else {
Node ans = query(1, 1, n, u, v);
if (ans.uOpt == INF) puts("-1");
else printf("%d\n", ans.uOpt);
}
}
return 0;
}

题目链接:Codeforces 1217D

题目要求给出一种边的染色方案,使得所有的环上必须存在至少两种颜色,并且用的颜色数量最少。

在有向图上DFS的时候,每出现一个返祖边,就出现一个环,只要让返祖边的颜色与其他的边不一样即可,所以最多有两种颜色。我们可以在用栈维护当前DFS的链,然后寻找返祖边即可。

渐进时间复杂度 \(O(n + m)\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
struct Edge {
int to, id;
};
const int MAX_N = 5000 + 5;
vector <Edge> g[MAX_N];
int n, m, c[MAX_N];
int dfn[MAX_N], idx;
int ins[MAX_N];
stack <int> stk;
void dfs(int u) {
dfn[u] = ++idx;
stk.push(u); ins[u] = 1;
for (int i = 0; i < g[u].size(); ++i) {
Edge &e = g[u][i];
if (!dfn[e.to]) {
c[e.id] = 1;
dfs(e.to);
} else if (ins[e.to]) {
c[e.id] = 2;
}
}
stk.pop(); ins[u] = 0;
}
int main() {
scanf("%d%d", &n, &m);
int u, v;
for (int i = 1; i <= m; ++i) {
scanf("%d%d", &u, &v);
g[u].push_back({v, i});
}
for (int i = 1; i <= n; ++i) {
if (!dfn[i]) dfs(i);
}
int cnt = 1;
for (int i = 1; i <= m; ++i) {
if (!c[i]) c[i] = 1;
if (c[i] == 2) cnt = 2;
}
printf("%d\n", cnt);
for (int i = 1; i <= m; ++i) {
printf("%d%c", c[i], i == m ? '\n' : ' ');
}
return 0;
}

题目链接:ICPC 2019 徐州赛区网络赛 B

所有的答案只可能是\(x_i\)\(x_i + 1\),对这些点离散化之后,使用并查集维护从当前点开始向右第一个合法位置,合并的时候向右合并即可。

渐进时间复杂度\(O(q \cdot \alpha(2n))\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = 2000000 + 5;
int n, q, z[MAX_N], x[MAX_N], d[MAX_N], tot;
int f[MAX_N];
void init() {
for (int i = 1; i <= tot; ++i) f[i] = i;
}
int find(int u) {
return u == f[u] ? u : f[u] = find(f[u]);
}
inline int merge(int u, int v) {
f[find(u)] = find(v);
}
int main() {
scanf("%d%d", &n, &q);
for (int i = 1; i <= q; ++i) {
scanf("%d%d", z + i, x + i);
d[++tot] = x[i];
d[++tot] = x[i] + 1;
}
sort(d + 1, d + tot + 1);
tot = unique(d + 1, d + tot + 1) - d - 1;
init();
for (int i = 1; i <= q; ++i) {****
if (z[i] == 1) {
int pos = lower_bound(d + 1, d + tot + 1, x[i]) - d;
int nex = lower_bound(d + 1, d + tot + 1, x[i] + 1) - d;
merge(pos, nex);
} else {
int pos = lower_bound(d + 1, d + tot + 1, x[i]) - d;
int u = find(pos);
printf("%d\n", d[u] <= n ? d[u] : -1);
}
}
return 0;
}

题目链接:ICPC 2019 徐州赛区网络赛 E

对于每个\(a_i\),我们要找一个最大的\(j > i\),满足\(a_j \geq a_i + m\)

考离线询问。对于一个点\(a_i\),加入一个询问和一个修改——在\(i\)位置插入\(a_i\),和一个查询——比\(a_i + m\)大的最大的\(j\)。我们可以对修改和查询进行排序,用优先队列维护比当前\(a_i + m\)大的最大的\(j\),然后离线回答即可。

渐进时间复杂度\(O(n \log n)\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const int MAX_N = 500000 + 5;
const int UPD = 0;
const int QRY = 1;
struct Query {
int type, val, pos;
friend bool operator < (const Query &u, const Query &v) {
if (u.val != v.val) return u.val > v.val;
else if (u.pos != v.pos) u.pos > v.pos;
else return u.type < v.type;
}
} task[MAX_N * 2];
int n, m, a[MAX_N], ans[MAX_N];
priority_queue <int> q;
int main() {
scanf("%d%d", &n, &m);
for (int i = 1; i <= n; ++i) {
scanf("%d", a + i);
task[i] = {UPD, a[i], i};
task[i + n] = {QRY, a[i] + m, i};
}
sort(task + 1, task + 2 * n + 1);
// for (int i = 1; i <= 2 * n; ++i) printf("type = %d, val = %d, pos = %d\n", task[i].type, task[i].val, task[i].pos);
for (int i = 1; i <= 2 * n; ++i) {
if (task[i].type == UPD) {
q.push(task[i].pos);
} else {
if (q.empty() || q.top() <= task[i].pos) ans[task[i].pos] = -1;
else ans[task[i].pos] = q.top() - task[i].pos - 1;
}
}
for (int i = 1; i <= n; ++i) printf("%d%c", ans[i], i == n ? '\n' : ' ');
return 0;
}

经典问题:记 \(f(x) = \sum_{i=0}^{n} a_i x^i\)\(g(x) = \sum_{i=0}^{m} b_i x^i\),其中\(1\leq n, m \leq 10^5\),求\(f \cdot g (x)\)

显然如果我们做朴素的多项式乘法,时间复杂度是 \(O(nm)\) 的。我们可以使用快速傅立叶变换(FFT)在 \(O(c \log c)\) 的时间内解决此问题,其中 \(c\) 是不小于 \(n+m\) 的最小的 \(2\) 的幂。

本文是笔者学习FFT的笔记和一些思考,不推荐作为教程食用。

在学习FFT之前,我们首先要理解一些概念。

多项式的系数表示和点值表示

对于多项式\(f(x) = \sum_{i=0}^{n} a_i x^i\),把系数写成向量的形式\(\vec{a} = [a_0, a_1, \cdots, a_n]\),则\(\vec{a}\)称为多项式的系数表示。我们输入一个\(x\),对\(x\)\(f(x)\)可以得到一个\(y\),表示这个多项式函数在\(x\)点处的值。

我们知道,\(n+1\)个点可以唯一确定一个\(n\)次多项式,那么我们可以使用\(n+1\)个序偶对\((x_i, y_i)\)来表示这个多项式,这称为多项式的点值表示。如何从点值表示得到任意\(x\)处的函数值呢?可以使用拉格朗日差值法,由于与FFT关系不大,故这里不做具体说明。

离散傅立叶变换(DFT)与反变换(IDFT)

对于一个系数表示\(\vec{a} = [a_0, a_1, \cdots, a_n]\),对其做离散傅立叶变换得到:

\[ {\rm DFT}(\vec{a}) = [f(w_{n}^{0}),f(w_{n}^{1}),f(w_{n}^{2})\cdots,f(w_{n}^{n-1})]  \]

设其表示的多项式为\(\hat{f}(x)\),我们可以对其做离散傅立叶变换的反变换重新的到\(f(x)\),即向量\(\vec{a}\)

\[ \vec{a} = {\rm IDFT} ({\rm DFT} (\vec{a})) = \frac{1}{n} [\hat{f}(w_{n}^{0}),\hat{f}(w_{n}^{-1}),\hat{f}(w_{n}^{-2})\cdots,\hat{f}(w_{n}^{-(n-1)})] \]

其中\(w_{n}^{k}\)\(x^n = 1\)在复数域中的根,称为\(n\)次单位根,\(k=1\)的时候又称为本原单位根。

多项式乘法的原理

考虑\(n\)次的多项式\(f(x)\)\(m\)次多项式相乘会产生一个\(n+m\)次的多项式,所以我们只需要获得两个多项式的\(n+m+1\)个点形成新的多项式的点值表示,这一点可以取\(n+m+1\)次单位根做\(\rm DFT\),然后对得到的向量做\(\rm IDFT\)就可以得到目标多项式的系数表示了。即:

\[\vec{c} =  {\rm IDFT} [{\rm DFT}(\vec{a}) \circ {\rm DFT}(\vec{b})]  = \vec{a} * \vec{b}\]

这里也验证了傅立叶变换时域卷积等于频域乘积的性质。

快速求解DFT和IDFT的方法

我们知道卷积的求解和差值的求解都是\(O(n^2)\)的,和朴素的求法相比并没有提升,我么考虑用分治的方法来优化DFT。 考虑\(w_n\)\(w_{2n}\),有这样的两条性质:

\[ \left\{ \begin{aligned} & (w_{2n}^{k})^2 = w_{n}^{k} \\ & w_{2n}^{n+k} = -w_{2n}^{k}  \end{aligned} \right . \]

这里可以在复平面上画图理解,当然《复变函数》中也有该性质的证明,这里不必过于纠结,会用即可。

\(f(x) = \sum_{i=0}^{n} a_i x^i\)\(n=2m\),我们考虑将其项的次数按照奇偶进行分类:

\[ \begin{aligned} f(x) = & \sum_{i=0}^{n} a_i x^i \\ = & \sum_{i=0}^{m} a_{2i} x^{2i} + \sum_{i=0}^{m} a_{2i+1} x^{2i+1} \\ = & \sum_{i=0}^{m} a_{2i} x^{2i} + x \sum_{i=0}^{m} a_{2i+1} x^{2i} \\ = & f_0(x^2) + x f_1(x^2)   \end{aligned} \]

假设\(0 \leq k < m\),那么对于\(w_{n}^{k}\)

\[\begin{aligned} f(w_{n}^{k}) = & f_0((w_{n}^{k})^2) + w_{n}^{k} f_1((w_{n}^{k})^2) \\ = & f_0(w_{m}^{k}) + w_{n}^{k} f_1(w_{m}^{k}) \end{aligned} \]

对于\(w_{n}^{m+k}\)

\[ \begin{aligned} f(w_{n}^{m+k}) = & f_0((w_{n}^{m+k})^2) + w_{n}^{m+k} f_1((w_{n}^{m+k})^2) \\ = & f_0(w_{m}^{k}) - w_{n}^{k} f_1(w_{m}^{k}) \end{aligned} \]

因此就可以用分治的方法快速计算DFT了。

位逆序置换与非递归FFT

为了加速FFT的过程,我们可以将FFT写成非递归的形式,考虑展开递归树。

假设我们有8个数\([(0,1,2,3,4,5,6,7)]\),经过一次递归之后变成\([(0,2,4,6), (1,3,5,7)]\),经过第二次递归之后变成\([(0,4),(2,6),(1,5),(3,7)]\),经过第三次递归之后变成\([(0), (4), (2), (6), (1), (5),(3),(7)]\),我们发现第\(i\)位是\(i\)的二进制数翻转对应的那个十进制数字,因此我们可以把每个数字放在它最后的位置上,然后逐层向上还原,写成非递归版的FFT。

一份常数比较大的模板

洛谷上\(10^6\)级别的多项式乘法有两个点是TLE的,LOJ上\(10^5\)级别的多项式乘法通过没什么问题。(待后续更新)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
typedef complex <double> CP;
const int MAX_N = int(2.1E6) + 5;
const double PI = acos(-1);
namespace FFT {
int n, aSz, bSz;
CP a[MAX_N], b[MAX_N], omg[MAX_N], inv[MAX_N];
void init() {
for (int i = 0; i < n; ++i) {
omg[i] = CP(cos(2 * PI * i / n), sin(2 * PI * i / n));
inv[i] = conj(omg[i]);
}
}
void fft(CP *a, CP *omg) {
int lim = 0;
while ((1 << lim) < n) ++lim;
for (int i = 0; i < n; ++i) {
int t = 0;
for (int j = 0; j < lim; ++j) {
if((i >> j) & 1) t |= (1 << (lim - j - 1));
}
if (i < t) swap(a[i], a[t]);
}
for (int l = 2; l <= n; l <<= 1) {
int m = l / 2;
for (CP *p = a; p != a + n; p += l) {
for (int i = 0; i < m; ++i) {
CP t = omg[n / l * i] * p[i + m];
p[i + m] = p[i] - t;
p[i] += t;
}
}
}
}
void run() {
n = 1;
while (n < aSz + bSz) n <<= 1;
init();
// printf("n = %d\n", n);
fft(a, omg);
fft(b, omg);
for (int i = 0; i < n; ++i) a[i] *= b[i];
fft(a, inv);
int len = aSz + bSz - 1;
for (int i = 0; i < len; ++i) {
printf("%d%c", int(round(a[i].real() / n)), i == len - 1 ? '\n' : ' ');
}
}
};
int main() {
scanf("%d%d", &FFT::aSz, &FFT::bSz);
++FFT::aSz, ++FFT::bSz;
int u;
for (int i = 0; i < FFT::aSz; ++i) {
scanf("%d", &u);
FFT::a[i].real(u);
}
for (int i = 0; i < FFT::bSz; ++i) {
scanf("%d", &u);
FFT::b[i].real(u);
}
FFT::run();
return 0;
}

题目链接:ICPC 2019 南京网络赛 E 题

思路

\(f_n(k)\)化简得: \[ f_n(k) \begin{aligned} & = \sum_{u=1}^{n} \lfloor \frac{n}{u} \rfloor ^ k  \sum_{d|u} d^2 \mu(\frac{u}{d}) \end{aligned} \] 代入目标式并交换求和次序: \[\begin{aligned} & \sum_{i=2}^{k} \sum_{u=1}^{n} \lfloor \frac{n}{u} \rfloor ^ k  \sum_{d|u} d^2 \mu(\frac{u}{d}) \\ =& \sum_{u=1}^{n}\sum_{i=2}^{k} \lfloor \frac{n}{u} \rfloor ^ k  \sum_{d|u} d^2 \mu(\frac{u}{d}) \\ =& \sum_{u=1}^{n} t(\lfloor \frac{n}{u} \rfloor)  ({\rm Id}^2 * \mu)(u) \end{aligned} \] 其中\(t(\lfloor \frac{n}{u} \rfloor)\)表示一个等比数列求和的结果,对于比较大的 \(k\),可以考虑欧拉函数降幂,然后可以整除分块求解。坑点是要特判公比为 \(1\) 的情况,公比为 \(1\) 的时候的 \(k\) 的计算值是 \(k \bmod P\) 而不是 \(k \bmod \varphi(P)\)。 下面考虑求式中 Direchlet 卷积的前缀和。\(n\)的范围是\(10^9\),考虑使用亚线性筛。

\(f(x) =({\rm Id}^2 * \mu)(x)\)\(g(x) = 1\),则\(f * g = {\rm Id}^2 * \mu * 1 = {\rm Id}^2 * \epsilon = {\rm Id}^2\),可求前缀和,考虑杜教筛。 设\(F(n) = \sum_{x=1}^{n} f(x)\),则可以对\(f*g\)求和推得: \[ F(n) = \frac{n(n+1)(2n+1)}{6} - \sum_{d=2}^{n} F(\lfloor \frac{n}{d} \rfloor) \] 线性筛预处理\(n \leq 10^6\),然后杜教筛即可。

渐进时间复杂度\(O(T (n^{\frac{2}{3}} + \sqrt{n} \times \log P) )\)

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
const LL MAX_LEN = 1000000 + 5;
const LL P = LL(1E9) + 7;
const LL PHI_P = LL(1E9) + 6;
const LL INV_6 = 166666668;
inline LL getPow(LL u, LL v) {
LL ret = 1;
while (v) {
if (v & 1) ret = ret * u % P;
u = u * u % P;
v >>= 1;
}
return ret;
}
inline LL add(LL u, LL v, LL x = 0, LL y = 0) { return (((u + v) % P) + ((x + y) % P) % P); }
inline LL sub(LL u, LL v) { return (((u - v) % P) + P) % P; }
inline LL mul(LL u, LL v, LL x = 1, LL y = 1) { return (((u * v) % P) * ((x * y) % P) % P); }
bool vis[MAX_LEN];
LL p[MAX_LEN], low[MAX_LEN], tot;
LL f[MAX_LEN];
void linearSieve() {
vis[0] = vis[1] = 1;
low[1] = f[1] = 1;
for (LL i = 2; i < MAX_LEN; ++i) {
if (!vis[i]) {
p[++tot] = i;
f[i] = sub(i * i % P, 1);
low[i] = i;
}
for (LL j = 1; j <= tot && i * p[j] < MAX_LEN; ++j) {
vis[i * p[j]] = 1;
if (i % p[j] == 0) {
low[i * p[j]] = low[i] * p[j];
if (low[i] == i) f[i * p[j]] = mul(p[j], p[j]) * f[i] % P;
else f[i * p[j]] = f[i / low[i]] * f[p[j] * low[i]] % P;
break;
} else {
f[i * p[j]] = f[i] * f[p[j]] % P;
low[i * p[j]] = p[j];
}
}
}
for (LL i = 1; i < MAX_LEN; ++i) f[i] = (f[i] + f[i - 1]) % P;
}
unordered_map <LL, LL> hashS;
LL F(LL x) {
if (x < MAX_LEN) return f[x];
if (hashS.find(x) != hashS.end()) return hashS[x];
LL ret = mul(x, x + 1, x * 2 + 1, INV_6);
LL l, r;
for (l = 2; l <= x; l = r + 1) {
r = x / (x / l);
ret = sub(ret, F(x / l) * (r - l + 1) % P);
ret = ((ret % P) + P) % P;
}
return hashS[x] = ret;
}
const LL MAX_N = 100000 + 5;
LL T, N, K, SP;
char str[MAX_N];
int main() {
linearSieve();
// for (int i = 1; i <= 10; ++i) printf("f[%d] = %lld\n", i, f[i] - f[i - 1]);
scanf("%lld", &T);
for (LL cs = 1; cs <= T; ++cs) {
hashS.clear();
scanf("%lld", &N);
// memset(str, '\0', sizeof str);
scanf("%s", str);
// printf("N = %lld, K = %s\n", N, str);
LL len = strlen(str);
K = 0; SP = 0;
for (LL i = 0; i < len; ++i) {
K = (K * 10 + (str[i] - '0')) % PHI_P;
SP = (SP * 10 + (str[i] - '0')) % P;
}
K = ((K - 1) % PHI_P + PHI_P) % PHI_P;
SP = ((SP - 1) % P + P) % P;
LL l, r, ans = 0;
for (l = 1; l <= N; l = r + 1) {
r = N / (N / l);
LL b = N / l;
LL invDown = getPow(sub(1, b), P - 2);
LL right = sub(1, getPow(b, K));
LL con = mul(b, b, invDown, right);
if (b == 1) con = SP;
ans = add(ans, mul(con, sub(F(r), F(l - 1))));
}
printf("%lld\n", ans);
}
return 0;
}