00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include <map>
00031 #include <assert.h>
00032 #include <claw/it_index.hpp>
00033
00034
00043 template<class RandomIterator>
00044 unsigned int
00045 claw::text::kmp<RandomIterator>::
00046 common_prefix_length( const RandomIterator begin_1,
00047 const RandomIterator begin_2,
00048 const RandomIterator end_1, const RandomIterator end_2
00049 ) const
00050 {
00051 unsigned int count = 0;
00052 RandomIterator it_1 = begin_1, it_2 = begin_2;
00053 bool quit = false;
00054
00055 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
00056 if ( *it_1 == *it_2 )
00057 {
00058 ++it_1;
00059 ++it_2;
00060 ++count;
00061 }
00062 else
00063 quit = true;
00064
00065 return count;
00066 }
00067
00068
00076 template<class RandomIterator>
00077 void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
00078 const RandomIterator end,
00079 std::map<unsigned int,unsigned int>& out) const
00080 {
00081
00082 claw::it_index<RandomIterator> it_r(begin);
00083 claw::it_index<RandomIterator> it_l(begin);
00084 claw::it_index<RandomIterator> it_k(begin);
00085 unsigned int z;
00086
00087 for (++it_k; it_k!=end; ++it_k)
00088 {
00089 if (it_k > it_r)
00090 {
00091 z = common_prefix_length(begin, it_k, end, end);
00092
00093 if ( z > 0 )
00094 {
00095 out[it_k] = z;
00096 it_l = it_k;
00097 it_r = it_k.operator+(z) - 1;
00098 }
00099 }
00100 else
00101 {
00102 unsigned int k_bis = it_k - it_l;
00103 claw::it_index<RandomIterator> it_b(it_r - it_k);
00104
00105 if ( out.find(k_bis) == out.end() )
00106 z = 0;
00107 else
00108 z = out[k_bis];
00109
00110 if ( z <= (unsigned int)it_b )
00111 {
00112 if ( z > 0 )
00113 out[it_k] = z;
00114 }
00115 else
00116 {
00117 claw::it_index<RandomIterator> it_q = it_r + 1;
00118
00119 it_q += common_prefix_length(it_q, it_b+1, end, end);
00120
00121 out[it_k] = it_q - it_k;
00122 it_r = it_q - 1;
00123 it_l = it_k;
00124 }
00125 }
00126 }
00127 }
00128
00129
00137 template<class RandomIterator>
00138 void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
00139 const RandomIterator end,
00140 std::map<unsigned int, unsigned int>& out) const
00141 {
00142 std::map<unsigned int, unsigned int> z;
00143 unsigned int j;
00144
00145
00146 z_boxes(begin, end, z);
00147
00148
00149 j=end-begin;
00150
00151 do
00152 {
00153 --j;
00154 if (z.find(j) != z.end())
00155 out[j + z[j] - 1] = z[j];
00156 }
00157 while (j!=0);
00158 }
00159
00160
00171 template<class RandomIterator>
00172 template<class UnaryPredicate>
00173 void claw::text::kmp<RandomIterator>::operator()
00174 (const RandomIterator pattern_begin, const RandomIterator pattern_end,
00175 const RandomIterator text_begin, const RandomIterator text_end,
00176 UnaryPredicate& action ) const
00177 {
00178 std::map<unsigned int, unsigned int> spi;
00179 claw::it_index<RandomIterator> it_p(pattern_begin,1);
00180 claw::it_index<RandomIterator> it_t(text_begin,1);
00181 bool stop = false;
00182
00183 const int pattern_length = pattern_end - pattern_begin;
00184 const int text_length = text_end - text_begin;
00185
00186 assert(pattern_begin != pattern_end);
00187
00188 spi_prime(pattern_begin, pattern_end, spi);
00189
00190 unsigned int i = 0;
00191
00192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
00193 {
00194 unsigned int common;
00195
00196 common = common_prefix_length(it_p, it_t, pattern_end, text_end);
00197 i += common;
00198 it_p += common;
00199 it_t += common;
00200
00201 if (it_p == 1)
00202 ++it_t;
00203 else
00204 {
00205 if ( (int)it_p == pattern_length+1 )
00206 stop = !action( (int)it_t - pattern_length-1 );
00207
00208 i = spi[i-1];
00209 it_p.set(pattern_begin+i, i+1);
00210 }
00211 }
00212 }