P4173 残缺的字符串

更精彩内容看这个巨佬的博文

没想到字符串匹配还能这么玩
把比较两个字符串的过程形式化, 判断两个字符串相等\(a[i]-b[i]=0\)
普通的模式串匹配
若模式串与另一个串以\(x\)结尾的字串匹配, 即\(P(x)=\sum\limits_{i=0}^{m-1}(a[i]-b[x-m+i+1])=0\).
但是这样是有问题的,两个字符串只要字符种类和个数相同就被认为匹配了...
原因是会出现负号...
\(P(x)=\sum\limits_{i=0}^{m-1}(a[i]-b[x-m+i+1])^2\)就解决这个问题了.
大力展开式子并翻转 a 串, 可以发现第三项是卷积形式, 可以用 FFT 优化

那么通配符怎么办? 重新设计\(P(x)\), 我们只需要让通配符的匹配函数都等于\(0\)就可以了.
把 * 对应\(0\), 就是满足\(P(x)=\sum\limits_{i=0}^{m-1}(a[i]-b[x-m+i+1])^2a[i]b[x-m+i+1]=0\).
大力展开式子, \(P(x)=\sum\limits_{i=0}^{m-1}a[i]b[x-m+i+1]^3+a[i]^3b[x-m+i+1]+2a[i]^2b[x-m+i+1]^2\)
翻转一下 a 串, 即\(a[i]=a'[m-i-1]\).
\(P(x)=\sum\limits_{i=0}^{m-1}a'[m-i-1]b[x-m+i+1]^3+a'[m-i-1]^3b[x-m+i+1]+2a'[m-i-1]^2b[x-m+i+1]^2\)
可以发现这三个都是卷积形式, 所以做三次 FFT, 一次 IFFT就可以解决问题了.
时间复杂度\(\mathjaxcal{O}(n\log n)\), 常数有点大, 还有这道题卡精度, 把 eps 设置大一点可过

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include <bits/stdc++.h>
using namespace std;

#define y1 gmdllll
typedef complex<double> cp;
const int N = 1200005;
const double pi = acos(-1.0), eps = 0.05;
int n, m, rev[N];
double x1[N], y1[N];
cp a[N], b[N], p[N];
char x[N], y[N];

void fft(cp *a, int n, int inv) {
for (int i = 1; i < n; i++) if (i < rev[i]) swap(a[i], a[rev[i]]);
for (int k = 1; k < n; k <<= 1) {
cp wn(cos(pi / k), inv * sin(pi / k));
for (int i = 0; i < n; i += 2 * k) {
cp w(1, 0.0);
for (int j = 0; j < k; j++, w *= wn) {
cp x = a[i + j], y = w * a[i + j + k];
a[i + j] = x + y, a[i + j + k] = x - y;
}
}
}
if (inv < 0.0) for (int i = 0; i < n; i++) a[i] /= n;
}

int main() {
scanf("%d %d", &m, &n);
scanf("%s %s", x , y);
reverse(x, x + m);
for (int i = 0; i < m; i++) x1[i] = (x[i] == '*') ? 0.0 : (x[i] - 'a' + 1);
for (int i = 0; i < n; i++) y1[i] = (y[i] == '*') ? 0.0 : (y[i] - 'a' + 1);
int l = 0, len = 1;
while (len <= n + m) len <<= 1, l++;
for (int i = 1; i < len; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (l - 1));

for (int i = 0; i < len; i++) a[i] = cp(x1[i], 0.0), b[i] = cp((y1[i]) * (y1[i]) * (y1[i]), 0.0);
fft(a, len, 1), fft(b, len, 1);
for (int i = 0; i < len; i++) p[i] += a[i] * b[i];

for (int i = 0; i < len; i++) a[i] = cp((x1[i]) * (x1[i]) * (x1[i]), 0.0), b[i] = cp(y1[i], 0.0);
fft(a, len, 1), fft(b, len, 1);
for (int i = 0; i < len; i++) p[i] += a[i] * b[i];

for (int i = 0; i < len; i++) a[i] = cp((x1[i]) * (x1[i]), 0.0), b[i] = cp((y1[i]) * (y1[i]), 0.0);
fft(a, len, 1), fft(b, len, 1);
for (int i = 0; i < len; i++) p[i] -= cp(2.0, 0.0) * a[i] * b[i];
fft(p, len, -1);

int v[N], top = 0;
for (int i = m - 1; i < n; i++) if (fabs(p[i].real()) < eps) v[++top] = i - m + 2;
printf("%d\n", top);
for (int i = 1; i <= top; i++) printf("%d ", v[i]);
return 0;
}

P4173 残缺的字符串
https://widsnoy.top/posts/f4c0/
作者
widsnoy
发布于
2020年9月17日
许可协议