37: def scan_tokens tokens, options
38:
39: state = :initial
40: inline_block_stack = []
41: inline_block_paren_depth = nil
42: string_delimiter = nil
43: import_clause = class_name_follows = last_token = after_def = false
44: value_expected = true
45:
46: until eos?
47:
48: kind = nil
49: match = nil
50:
51: case state
52:
53: when :initial
54:
55: if match = scan(/ \s+ | \\\n /x)
56: tokens << [match, :space]
57: if match.index ?\n
58: import_clause = after_def = false
59: value_expected = true unless value_expected
60: end
61: next
62:
63: elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
64: value_expected = true
65: after_def = false
66: kind = :comment
67:
68: elsif bol? && scan(/ \#!.* /x)
69: kind = :doctype
70:
71: elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
72: after_def = value_expected = false
73: kind = :include
74:
75: elsif match = scan(/ #{IDENT} | \[\] /ox)
76: kind = IDENT_KIND[match]
77: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
78: if last_token == '.'
79: kind = :ident
80: elsif class_name_follows
81: kind = :class
82: class_name_follows = false
83: elsif after_def && check(/\s*[({]/)
84: kind = :method
85: after_def = false
86: elsif kind == :ident && last_token != '?' && check(/:/)
87: kind = :key
88: else
89: class_name_follows = true if match == 'class' || (import_clause && match == 'as')
90: import_clause = match == 'import'
91: after_def = true if match == 'def'
92: end
93:
94: elsif scan(/;/)
95: import_clause = after_def = false
96: value_expected = true
97: kind = :operator
98:
99: elsif scan(/\{/)
100: class_name_follows = after_def = false
101: value_expected = true
102: kind = :operator
103: if !inline_block_stack.empty?
104: inline_block_paren_depth += 1
105: end
106:
107:
108: elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
109: && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
110: value_expected = true
111: value_expected = :regexp if match == '~'
112: after_def = false
113: kind = :operator
114:
115: elsif match = scan(/ [)\]}] /x)
116: value_expected = after_def = false
117: if !inline_block_stack.empty? && match == '}'
118: inline_block_paren_depth -= 1
119: if inline_block_paren_depth == 0
120: tokens << [match, :inline_delimiter]
121: tokens << [:close, :inline]
122: state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
123: next
124: end
125: end
126: kind = :operator
127:
128: elsif check(/[\d.]/)
129: after_def = value_expected = false
130: if scan(/0[xX][0-9A-Fa-f]+/)
131: kind = :hex
132: elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
133: kind = :oct
134: elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
135: kind = :float
136: elsif scan(/\d+[lLgG]?/)
137: kind = :integer
138: end
139:
140: elsif match = scan(/'''|"""/)
141: after_def = value_expected = false
142: state = :multiline_string
143: tokens << [:open, :string]
144: string_delimiter = match
145: kind = :delimiter
146:
147:
148: elsif match = scan(/["']/)
149: after_def = value_expected = false
150: state = match == '/' ? :regexp : :string
151: tokens << [:open, state]
152: string_delimiter = match
153: kind = :delimiter
154:
155: elsif value_expected && (match = scan(/\//))
156: after_def = value_expected = false
157: tokens << [:open, :regexp]
158: state = :regexp
159: string_delimiter = '/'
160: kind = :delimiter
161:
162: elsif scan(/ @ #{IDENT} /ox)
163: after_def = value_expected = false
164: kind = :annotation
165:
166: elsif scan(/\//)
167: after_def = false
168: value_expected = true
169: kind = :operator
170:
171: else
172: getch
173: kind = :error
174:
175: end
176:
177: when :string, :regexp, :multiline_string
178: if scan(STRING_CONTENT_PATTERN[string_delimiter])
179: kind = :content
180:
181: elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
182: tokens << [match, :delimiter]
183: if state == :regexp
184:
185: modifiers = scan(/[ix]+/)
186: tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
187: end
188: state = :string if state == :multiline_string
189: tokens << [:close, state]
190: string_delimiter = nil
191: after_def = value_expected = false
192: state = :initial
193: next
194:
195: elsif (state == :string || state == :multiline_string) &&
196: (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
197: if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
198: kind = :content
199: else
200: kind = :char
201: end
202: elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
203: kind = :char
204:
205: elsif match = scan(/ \$ #{IDENT} /mox)
206: tokens << [:open, :inline]
207: tokens << ['$', :inline_delimiter]
208: match = match[1..-1]
209: tokens << [match, IDENT_KIND[match]]
210: tokens << [:close, :inline]
211: next
212: elsif match = scan(/ \$ \{ /x)
213: tokens << [:open, :inline]
214: tokens << ['${', :inline_delimiter]
215: inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
216: inline_block_paren_depth = 1
217: state = :initial
218: next
219:
220: elsif scan(/ \$ /mx)
221: kind = :content
222:
223: elsif scan(/ \\. /mx)
224: kind = :content
225:
226: elsif scan(/ \\ | \n /x)
227: tokens << [:close, state]
228: kind = :error
229: after_def = value_expected = false
230: state = :initial
231:
232: else
233: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
234: end
235:
236: else
237: raise_inspect 'Unknown state', tokens
238:
239: end
240:
241: match ||= matched
242: if $CODERAY_DEBUG and not kind
243: raise_inspect 'Error token %p in line %d' %
244: [[match, kind], line], tokens
245: end
246: raise_inspect 'Empty token', tokens unless match
247:
248: last_token = match unless [:space, :comment, :doctype].include? kind
249:
250: tokens << [match, kind]
251:
252: end
253:
254: if [:multiline_string, :string, :regexp].include? state
255: tokens << [:close, state]
256: end
257:
258: tokens
259: end