CLAW Library (a C++ Library Absolutely Wonderful) 1.5.5
Public Member Functions | Private Member Functions

claw::text::kmp< RandomIterator > Class Template Reference

Exact pattern finding with the Knuth-Morris-Pratt's algorithm. More...

#include <kmp.hpp>

List of all members.

Public Member Functions

template<class UnaryPredicate >
void operator() (const RandomIterator pattern_begin, const RandomIterator pattern_end, const RandomIterator text_begin, const RandomIterator text_end, UnaryPredicate &action) const
 Pattern matching with the Knuth-Morris-Pratt's algorithm.

Private Member Functions

unsigned int common_prefix_length (const RandomIterator begin_1, const RandomIterator begin_2, const RandomIterator end_1, const RandomIterator end_2) const
 Calculate the length of the longest common prefix between two patterns.
void z_boxes (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const
 Preprocessing. Calculate the pattern's z-boxes.
void spi_prime (const RandomIterator begin, const RandomIterator end, std::map< unsigned int, unsigned int > &out) const
 Preprocessing. Calculate the pattern's spi' values.

Detailed Description

template<class RandomIterator>
class claw::text::kmp< RandomIterator >

Exact pattern finding with the Knuth-Morris-Pratt's algorithm.

Author:
Julien Jorge

Definition at line 44 of file kmp.hpp.


Member Function Documentation

template<class RandomIterator >
unsigned int claw::text::kmp< RandomIterator >::common_prefix_length ( const RandomIterator  begin_1,
const RandomIterator  begin_2,
const RandomIterator  end_1,
const RandomIterator  end_2 
) const [private]

Calculate the length of the longest common prefix between two patterns.

Parameters:
begin_1Iterator on the first item in the first pattern.
begin_2Iterator on the first item in the second pattern.
end_1Iterator after the last item in the first pattern.
end_2Iterator after the last item in the second pattern.

Definition at line 46 of file kmp.tpp.

{
  unsigned int count = 0;
  RandomIterator it_1 = begin_1, it_2 = begin_2;
  bool quit = false;

  while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
    if ( *it_1 == *it_2 )
      {
        ++it_1;
        ++it_2;
        ++count;
      }
    else
      quit = true;

  return count;
} // common_prefix_length()
template<class RandomIterator >
template<class UnaryPredicate >
void claw::text::kmp< RandomIterator >::operator() ( const RandomIterator  pattern_begin,
const RandomIterator  pattern_end,
const RandomIterator  text_begin,
const RandomIterator  text_end,
UnaryPredicate &  action 
) const

Pattern matching with the Knuth-Morris-Pratt's algorithm.

Parameters:
pattern_beginIterator on the first item in the pattern.
pattern_endIterator after the last item in the pattern.
text_beginIterator on the first item in the text.
text_endIterator after the last item in the text.
actionPredicate called with the last found position for the pattern.
Remarks:
Exits if action return false.
Precondition:
pattern_begin != pattern_end

Definition at line 174 of file kmp.tpp.

References claw::it_index< T >::set().

{
  std::map<unsigned int, unsigned int> spi; // pattern's spi'
  claw::it_index<RandomIterator> it_p(pattern_begin,1);
  claw::it_index<RandomIterator> it_t(text_begin,1);
  bool stop = false;    // result of the last call to the predicate action

  const int pattern_length = pattern_end - pattern_begin;
  const int text_length    = text_end    - text_begin;

  assert(pattern_begin != pattern_end);

  spi_prime(pattern_begin, pattern_end, spi);

  unsigned int i = 0;

  while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
    {
      unsigned int common;

      common = common_prefix_length(it_p, it_t, pattern_end, text_end);
      i += common;
      it_p += common;
      it_t += common;

      if (it_p == 1)
  ++it_t;
      else
        {
    if ( (int)it_p == pattern_length+1 )
      stop = !action( (int)it_t - pattern_length-1 );

    i = spi[i-1];
    it_p.set(pattern_begin+i, i+1);
        }
    }
} // operator()
template<class RandomIterator >
void claw::text::kmp< RandomIterator >::spi_prime ( const RandomIterator  begin,
const RandomIterator  end,
std::map< unsigned int, unsigned int > &  out 
) const [private]

Preprocessing. Calculate the pattern's spi' values.

Parameters:
beginIterator on the first item in the pattern.
endIterator after the last item in the pattern.
out(out) Calculated spi'.
Remarks:
out contains only non-zero spi'.

Definition at line 138 of file kmp.tpp.

{
  std::map<unsigned int, unsigned int> z;    // pattern's Z-boxes.
  unsigned int j;                            // loop index.

  // set Z-boxes
  z_boxes(begin, end, z);

  // calculates spi' (from end to begining)
  j=end-begin;

  do
    {
      --j;
      if (z.find(j) != z.end())
        out[j + z[j] - 1] = z[j];
    }
  while (j!=0);
} // spi_prime()
template<class RandomIterator >
void claw::text::kmp< RandomIterator >::z_boxes ( const RandomIterator  begin,
const RandomIterator  end,
std::map< unsigned int, unsigned int > &  out 
) const [private]

Preprocessing. Calculate the pattern's z-boxes.

Parameters:
beginIterator on the first item in the pattern.
endIterator after the last item in the pattern.
out(out) Calculated z-boxes.
Remarks:
out contains only non-zero z-boxes.

Definition at line 77 of file kmp.tpp.

{
  // right and left bounds of the current item's Z-box.
  claw::it_index<RandomIterator> it_r(begin);
  claw::it_index<RandomIterator> it_l(begin); 
  claw::it_index<RandomIterator> it_k(begin);      // increment on the items
  unsigned int z;                   // le Zi of the current position

  for (++it_k; it_k!=end; ++it_k)
    {
      if (it_k > it_r)
        {
          z = common_prefix_length(begin, it_k, end, end);
                  
          if ( z > 0 ) // set the Z-box
            {
              out[it_k] = z;
              it_l = it_k;
              it_r = it_k.operator+(z) - 1;
            }
        }
      else /* k <= r */
        {
          unsigned int k_bis = it_k - it_l;
    claw::it_index<RandomIterator> it_b(it_r - it_k);

          if ( out.find(k_bis) == out.end() )
            z = 0;
          else
            z = out[k_bis];

          if ( z <= (unsigned int)it_b )
            {
              if ( z > 0 )
                out[it_k] = z;
            }
          else
            {
              claw::it_index<RandomIterator> it_q = it_r + 1;
                          
              it_q += common_prefix_length(it_q, it_b+1, end, end);

              out[it_k] = it_q - it_k;
              it_r = it_q - 1;
              it_l = it_k;
            }
        }
    }
} // z_boxes()

The documentation for this class was generated from the following files: