class Regexp::Lexer

A very thin wrapper around the scanner that breaks quantified literal runs, collects emitted tokens into an array, calculates their nesting depth, and normalizes tokens for the parser, and checks if they are implemented by the given syntax flavor.

Constants

CLOSING_TOKENS
OPENING_TOKENS

Attributes

conditional_nesting[RW]
nesting[RW]
set_nesting[RW]
shift[RW]
tokens[RW]

Public Class Methods

lex(input, syntax = "ruby/ click to toggle source
# File lib/regexp_parser/lexer.rb, line 14
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
  new.lex(input, syntax, &block)
end
Also aliased as: scan
scan(input, syntax = "ruby/
Alias for: lex

Public Instance Methods

lex(input, syntax = "ruby/ click to toggle source
# File lib/regexp_parser/lexer.rb, line 18
def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
  syntax = Regexp::Syntax.new(syntax)

  self.tokens = []
  self.nesting = 0
  self.set_nesting = 0
  self.conditional_nesting = 0
  self.shift = 0

  last = nil
  Regexp::Scanner.scan(input) do |type, token, text, ts, te|
    type, token = *syntax.normalize(type, token)
    syntax.check! type, token

    ascend(type, token)

    if type == :quantifier and last
      break_literal(last)        if last.type == :literal
      break_codepoint_list(last) if last.token == :codepoint_list
    end

    current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
                                nesting, set_nesting, conditional_nesting)

    current = merge_condition(current) if type == :conditional and
      [:condition, :condition_close].include?(token)

    last.next = current if last
    current.previous = last if last

    tokens << current
    last = current

    descend(type, token)
  end

  if block_given?
    tokens.map { |t| block.call(t) }
  else
    tokens
  end
end

Private Instance Methods

ascend(type, token) click to toggle source
# File lib/regexp_parser/lexer.rb, line 69
def ascend(type, token)
  case type
  when :group, :assertion
    self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
  when :set
    self.set_nesting = set_nesting - 1 if token == :close
  when :conditional
    self.conditional_nesting = conditional_nesting - 1 if token == :close
  end
end
break_codepoint_list(token) click to toggle source
# File lib/regexp_parser/lexer.rb, line 106
def break_codepoint_list(token)
  lead, _, tail = token.text.rpartition(' ')
  return if lead.empty?

  tokens.pop
  tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
            token.ts, (token.te - tail.length),
            nesting, set_nesting, conditional_nesting)
  tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
            (token.ts + lead.length + 1), (token.te + 3),
            nesting, set_nesting, conditional_nesting)

  self.shift = shift + 3 # one space less, but extra \, u, {, and }
end
break_literal(token) click to toggle source

called by scan to break a literal run that is longer than one character into two separate tokens when it is followed by a quantifier

# File lib/regexp_parser/lexer.rb, line 93
def break_literal(token)
  lead, last, _ = token.text.partition(/.\z/mu)
  return if lead.empty?

  tokens.pop
  tokens << Regexp::Token.new(:literal, :literal, lead,
            token.ts, (token.te - last.bytesize),
            nesting, set_nesting, conditional_nesting)
  tokens << Regexp::Token.new(:literal, :literal, last,
            (token.ts + lead.bytesize), token.te,
            nesting, set_nesting, conditional_nesting)
end
descend(type, token) click to toggle source
# File lib/regexp_parser/lexer.rb, line 80
def descend(type, token)
  case type
  when :group, :assertion
    self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
  when :set
    self.set_nesting = set_nesting + 1 if token == :open
  when :conditional
    self.conditional_nesting = conditional_nesting + 1 if token == :open
  end
end
merge_condition(current) click to toggle source
# File lib/regexp_parser/lexer.rb, line 121
def merge_condition(current)
  last = tokens.pop
  Regexp::Token.new(:conditional, :condition, last.text + current.text,
    last.ts, current.te, nesting, set_nesting, conditional_nesting)
end