SuffixArray

romophic-library

用途

高速な文字列検索を行う. イメージはgrep.

計算量

構築: $ O(N \log N) $
クエリ: $ O(M \log N) $

使い方

構築

1
SuffixArray sufa(s);

検索

1
auto res = sufa.lower_upper_bound(t);

一致部分として[res.first,res.second)が得られる.

実装(WIP)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
struct SuffixArray {
  vector<int> SA;
  const string s;
  SuffixArray(const string &str) : s(str) {
    SA.resize(s.size());
    iota(begin(SA), end(SA), 0);
    sort(begin(SA), end(SA), [&](int a, int b) {
      return s[a] == s[b] ? a > b : s[a] < s[b];
    });
    vector<int> classes(s.size()), c(s.begin(), s.end()), cnt(s.size());
    for (int len = 1; len < s.size(); len <<= 1) {
      for (int i = 0; i < s.size(); i++) {
        if (i > 0 && c[SA[i - 1]] == c[SA[i]] && SA[i - 1] + len < s.size() && c[SA[i - 1] + len / 2] == c[SA[i] + len / 2]) {
          classes[SA[i]] = classes[SA[i - 1]];
        } else {
          classes[SA[i]] = i;
        }
      }
      iota(begin(cnt), end(cnt), 0);
      copy(begin(SA), end(SA), begin(c));
      for (int i = 0; i < s.size(); i++) {
        int s1 = c[i] - len;
        if (s1 >= 0)
          SA[cnt[classes[s1]]++] = s1;
      }
      classes.swap(c);
    }
  }
  int operator[](int k) const {
    return SA[k];
  }
  size_t size() const {
    return s.size();
  }
  bool lt_substr(const string &t, int si = 0, int ti = 0) {
    int sn = (int)s.size(), tn = (int)t.size();
    while (si < sn && ti < tn) {
      if (s[si] < t[ti])
        return true;
      if (s[si] > t[ti])
        return false;
      ++si, ++ti;
    }
    return si >= sn && ti < tn;
  }
  int lower_bound(const string &t) {
    int low = -1, high = (int)SA.size();
    while (high - low > 1) {
      int mid = (low + high) / 2;
      if (lt_substr(t, SA[mid]))
        low = mid;
      else
        high = mid;
    }
    return high;
  }
  pair<int, int> lower_upper_bound(string &t) {
    int idx = lower_bound(t);
    int low = idx - 1, high = (int)SA.size();
    t.back()++;
    while (high - low > 1) {
      int mid = (low + high) / 2;
      if (lt_substr(t, SA[mid]))
        low = mid;
      else
        high = mid;
    }
    t.back()--;
    return {idx, high};
  }
  void output() {
    for (int i = 0; i < size(); i++) {
      cout << i << ": " << s.substr(SA[i]) << endl;
    }
  }
};

Verify

//TODO

Licensed under CC BY-NC-ND 4.0
All rights reserved.
Built with Hugo
Theme Stack is designed by Jimmy