regexp.cc source code [flutter_engine/third_party/dart/runtime/vm/regexp.cc]

1	// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2	// for details. All rights reserved. Use of this source code is governed by a
3	// BSD-style license that can be found in the LICENSE file.
4
5	#include "vm/regexp.h"
6
7	#include <memory>
8
9	#include "platform/splay-tree-inl.h"
10	#include "platform/unicode.h"
11
12	#include "unicode/uniset.h"
13
14	#include "vm/dart_entry.h"
15	#include "vm/regexp_assembler.h"
16	#include "vm/regexp_assembler_bytecode.h"
17	#include "vm/regexp_ast.h"
18	#include "vm/symbols.h"
19	#include "vm/thread.h"
20	#include "vm/unibrow-inl.h"
21
22	#if !defined(DART_PRECOMPILED_RUNTIME)
23	#include "vm/regexp_assembler_ir.h"
24	#endif // !defined(DART_PRECOMPILED_RUNTIME)
25
26	#define Z (zone())
27
28	namespace dart {
29
30	// Default to generating optimized regexp code.
31	static constexpr bool kRegexpOptimization = true;
32
33	// More makes code generation slower, less makes V8 benchmark score lower.
34	static constexpr intptr_t kMaxLookaheadForBoyerMoore = `8`;
35
36	ContainedInLattice AddRange(ContainedInLattice containment,
37	const int32_t* ranges,
38	intptr_t ranges_length,
39	Interval new_range) {
40	ASSERT((ranges_length & `1`) == `1`);
41	ASSERT(ranges[ranges_length - `1`] == Utf::kMaxCodePoint + `1`);
42	if (containment == kLatticeUnknown) return containment;
43	bool inside = false;
44	int32_t last = `0`;
45	for (intptr_t i = `0`; i < ranges_length;
46	inside = !inside, last = ranges[i], i++) {
47	// Consider the range from last to ranges[i].
48	// We haven't got to the new range yet.
49	if (ranges[i] <= new_range.from()) continue;
50	// New range is wholly inside last-ranges[i]. Note that new_range.to() is
51	// inclusive, but the values in ranges are not.
52	if (last <= new_range.from() && new_range.to() < ranges[i]) {
53	return Combine(a: containment, b: inside ? kLatticeIn : kLatticeOut);
54	}
55	return kLatticeUnknown;
56	}
57	return containment;
58	}
59
60	// -------------------------------------------------------------------
61	// Implementation of the Irregexp regular expression engine.
62	//
63	// The Irregexp regular expression engine is intended to be a complete
64	// implementation of ECMAScript regular expressions. It generates
65	// IR code that is subsequently compiled to native code.
66
67	// The Irregexp regexp engine is structured in three steps.
68	// 1) The parser generates an abstract syntax tree. See regexp_ast.cc.
69	// 2) From the AST a node network is created. The nodes are all
70	// subclasses of RegExpNode. The nodes represent states when
71	// executing a regular expression. Several optimizations are
72	// performed on the node network.
73	// 3) From the nodes we generate IR instructions that can actually
74	// execute the regular expression (perform the search). The
75	// code generation step is described in more detail below.
76
77	// Code generation.
78	//
79	// The nodes are divided into four main categories.
80	// Choice nodes*
81	// These represent places where the regular expression can
82	// match in more than one way. For example on entry to an
83	// alternation (foo\|bar) or a repetition (, +, ? or {}).*
84	// Action nodes*
85	// These represent places where some action should be
86	// performed. Examples include recording the current position
87	// in the input string to a register (in order to implement
88	// captures) or other actions on register for example in order
89	// to implement the counters needed for {} repetitions.
90	// Matching nodes*
91	// These attempt to match some element part of the input string.
92	// Examples of elements include character classes, plain strings
93	// or back references.
94	// End nodes*
95	// These are used to implement the actions required on finding
96	// a successful match or failing to find a match.
97	//
98	// The code generated maintains some state as it runs. This consists of the
99	// following elements:
100	//
101	// The capture registers. Used for string captures.*
102	// Other registers. Used for counters etc.*
103	// The current position.*
104	// The stack of backtracking information. Used when a matching node*
105	// fails to find a match and needs to try an alternative.
106	//
107	// Conceptual regular expression execution model:
108	//
109	// There is a simple conceptual model of regular expression execution
110	// which will be presented first. The actual code generated is a more
111	// efficient simulation of the simple conceptual model:
112	//
113	// Choice nodes are implemented as follows:*
114	// For each choice except the last {
115	// push current position
116	// push backtrack code location
117	// <generate code to test for choice>
118	// backtrack code location:
119	// pop current position
120	// }
121	// <generate code to test for last choice>
122	//
123	// Actions nodes are generated as follows*
124	// <push affected registers on backtrack stack>
125	// <generate code to perform action>
126	// push backtrack code location
127	// <generate code to test for following nodes>
128	// backtrack code location:
129	// <pop affected registers to restore their state>
130	// <pop backtrack location from stack and go to it>
131	//
132	// Matching nodes are generated as follows:*
133	// if input string matches at current position
134	// update current position
135	// <generate code to test for following nodes>
136	// else
137	// <pop backtrack location from stack and go to it>
138	//
139	// Thus it can be seen that the current position is saved and restored
140	// by the choice nodes, whereas the registers are saved and restored by
141	// by the action nodes that manipulate them.
142	//
143	// The other interesting aspect of this model is that nodes are generated
144	// at the point where they are needed by a recursive call to Emit(). If
145	// the node has already been code generated then the Emit() call will
146	// generate a jump to the previously generated code instead. In order to
147	// limit recursion it is possible for the Emit() function to put the node
148	// on a work list for later generation and instead generate a jump. The
149	// destination of the jump is resolved later when the code is generated.
150	//
151	// Actual regular expression code generation.
152	//
153	// Code generation is actually more complicated than the above. In order
154	// to improve the efficiency of the generated code some optimizations are
155	// performed
156	//
157	// Choice nodes have 1-character lookahead.*
158	// A choice node looks at the following character and eliminates some of
159	// the choices immediately based on that character. This is not yet
160	// implemented.
161	// Simple greedy loops store reduced backtracking information.*
162	// A quantifier like /.foo/m will greedily match the whole input. It will*
163	// then need to backtrack to a point where it can match "foo". The naive
164	// implementation of this would push each character position onto the
165	// backtracking stack, then pop them off one by one. This would use space
166	// proportional to the length of the input string. However since the "."
167	// can only match in one way and always has a constant length (in this case
168	// of 1) it suffices to store the current position on the top of the stack
169	// once. Matching now becomes merely incrementing the current position and
170	// backtracking becomes decrementing the current position and checking the
171	// result against the stored current position. This is faster and saves
172	// space.
173	// The current state is virtualized.*
174	// This is used to defer expensive operations until it is clear that they
175	// are needed and to generate code for a node more than once, allowing
176	// specialized an efficient versions of the code to be created. This is
177	// explained in the section below.
178	//
179	// Execution state virtualization.
180	//
181	// Instead of emitting code, nodes that manipulate the state can record their
182	// manipulation in an object called the Trace. The Trace object can record a
183	// current position offset, an optional backtrack code location on the top of
184	// the virtualized backtrack stack and some register changes. When a node is
185	// to be emitted it can flush the Trace or update it. Flushing the Trace
186	// will emit code to bring the actual state into line with the virtual state.
187	// Avoiding flushing the state can postpone some work (e.g. updates of capture
188	// registers). Postponing work can save time when executing the regular
189	// expression since it may be found that the work never has to be done as a
190	// failure to match can occur. In addition it is much faster to jump to a
191	// known backtrack code location than it is to pop an unknown backtrack
192	// location from the stack and jump there.
193	//
194	// The virtual state found in the Trace affects code generation. For example
195	// the virtual state contains the difference between the actual current
196	// position and the virtual current position, and matching code needs to use
197	// this offset to attempt a match in the correct location of the input
198	// string. Therefore code generated for a non-trivial trace is specialized
199	// to that trace. The code generator therefore has the ability to generate
200	// code for each node several times. In order to limit the size of the
201	// generated code there is an arbitrary limit on how many specialized sets of
202	// code may be generated for a given node. If the limit is reached, the
203	// trace is flushed and a generic version of the code for a node is emitted.
204	// This is subsequently used for that node. The code emitted for non-generic
205	// trace is not recorded in the node and so it cannot currently be reused in
206	// the event that code generation is requested for an identical trace.
207
208	void RegExpTree::AppendToText(RegExpText* text) {
209	UNREACHABLE();
210	}
211
212	void RegExpAtom::AppendToText(RegExpText* text) {
213	text->AddElement(elm: TextElement::Atom(atom: this));
214	}
215
216	void RegExpCharacterClass::AppendToText(RegExpText* text) {
217	text->AddElement(elm: TextElement::CharClass(char_class: this));
218	}
219
220	void RegExpText::AppendToText(RegExpText* text) {
221	for (intptr_t i = `0`; i < elements()->length(); i++)
222	text->AddElement(elm: (*elements())[i]);
223	}
224
225	TextElement TextElement::Atom(RegExpAtom* atom) {
226	return TextElement (ATOM, atom);
227	}
228
229	TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
230	return TextElement (CHAR_CLASS, char_class);
231	}
232
233	intptr_t TextElement::length() const {
234	switch (text_type()) {
235	case ATOM:
236	return atom()->length();
237
238	case CHAR_CLASS:
239	return `1`;
240	}
241	UNREACHABLE();
242	return `0`;
243	}
244
245	class FrequencyCollator : public ValueObject {
246	public:
247	FrequencyCollator() : total_samples_(`0`) {
248	for (intptr_t i = `0`; i < RegExpMacroAssembler::kTableSize; i++) {
249	frequencies_[i] = CharacterFrequency (i);
250	}
251	}
252
253	void CountCharacter(intptr_t character) {
254	intptr_t index = (character & RegExpMacroAssembler::kTableMask);
255	frequencies_[index].Increment();
256	total_samples_++;
257	}
258
259	// Does not measure in percent, but rather per-128 (the table size from the
260	// regexp macro assembler).
261	intptr_t Frequency(intptr_t in_character) {
262	ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
263	if (total_samples_ < `1`) return `1`; // Division by zero.
264	intptr_t freq_in_per128 =
265	(frequencies_[in_character].counter() * `128`) / total_samples_;
266	return freq_in_per128;
267	}
268
269	private:
270	class CharacterFrequency {
271	public:
272	CharacterFrequency() : counter_(`0`), character_(-`1`) {}
273	explicit CharacterFrequency(intptr_t character)
274	: counter_(`0`), character_(character) {}
275
276	void Increment() { counter_++; }
277	intptr_t counter() { return counter_; }
278	intptr_t character() { return character_; }
279
280	private:
281	intptr_t counter_;
282	intptr_t character_;
283
284	DISALLOW_ALLOCATION();
285	};
286
287	private:
288	CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
289	intptr_t total_samples_;
290	};
291
292	class RegExpCompiler : public ValueObject {
293	public:
294	RegExpCompiler(intptr_t capture_count, bool is_one_byte);
295
296	intptr_t AllocateRegister() { return next_register_++; }
297
298	// Lookarounds to match lone surrogates for unicode character class matches
299	// are never nested. We can therefore reuse registers.
300	intptr_t UnicodeLookaroundStackRegister() {
301	if (unicode_lookaround_stack_register_ == kNoRegister) {
302	unicode_lookaround_stack_register_ = AllocateRegister();
303	}
304	return unicode_lookaround_stack_register_;
305	}
306
307	intptr_t UnicodeLookaroundPositionRegister() {
308	if (unicode_lookaround_position_register_ == kNoRegister) {
309	unicode_lookaround_position_register_ = AllocateRegister();
310	}
311	return unicode_lookaround_position_register_;
312	}
313
314	#if !defined(DART_PRECOMPILED_RUNTIME)
315	RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
316	RegExpNode* start,
317	intptr_t capture_count,
318	const String& pattern);
319	#endif
320
321	RegExpEngine::CompilationResult Assemble(
322	BytecodeRegExpMacroAssembler* assembler,
323	RegExpNode* start,
324	intptr_t capture_count,
325	const String& pattern);
326
327	inline void AddWork(RegExpNode* node) { work_list_->Add(value: node); }
328
329	static constexpr intptr_t kImplementationOffset = `0`;
330	static constexpr intptr_t kNumberOfRegistersOffset = `0`;
331	static constexpr intptr_t kCodeOffset = `1`;
332
333	RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
334	EndNode* accept() { return accept_; }
335
336	static constexpr intptr_t kMaxRecursion = `100`;
337	inline intptr_t recursion_depth() { return recursion_depth_; }
338	inline void IncrementRecursionDepth() { recursion_depth_++; }
339	inline void DecrementRecursionDepth() { recursion_depth_--; }
340
341	void SetRegExpTooBig() { reg_exp_too_big_ = true; }
342
343	inline bool one_byte() const { return is_one_byte_; }
344	bool read_backward() { return read_backward_; }
345	void set_read_backward(bool value) { read_backward_ = value; }
346	FrequencyCollator* frequency_collator() { return &frequency_collator_; }
347
348	intptr_t current_expansion_factor() { return current_expansion_factor_; }
349	void set_current_expansion_factor(intptr_t value) {
350	current_expansion_factor_ = value;
351	}
352
353	Zone* zone() const { return zone_; }
354
355	static constexpr intptr_t kNoRegister = -`1`;
356
357	private:
358	EndNode* accept_;
359	intptr_t next_register_;
360	intptr_t unicode_lookaround_stack_register_;
361	intptr_t unicode_lookaround_position_register_;
362	ZoneGrowableArray<RegExpNode> work_list_;
363	intptr_t recursion_depth_;
364	RegExpMacroAssembler* macro_assembler_;
365	bool is_one_byte_;
366	bool reg_exp_too_big_;
367	bool read_backward_;
368	intptr_t current_expansion_factor_;
369	FrequencyCollator frequency_collator_;
370	Zone* zone_;
371	};
372
373	class RecursionCheck : public ValueObject {
374	public:
375	explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
376	compiler->IncrementRecursionDepth();
377	}
378	~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
379
380	private:
381	RegExpCompiler* compiler_;
382	};
383
384	static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
385	return RegExpEngine::CompilationResult ("RegExp too big");
386	}
387
388	// Attempts to compile the regexp using an Irregexp code generator. Returns
389	// a fixed array or a null handle depending on whether it succeeded.
390	RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool is_one_byte)
391	: next_register_(`2` * (capture_count + `1`)),
392	unicode_lookaround_stack_register_(kNoRegister),
393	unicode_lookaround_position_register_(kNoRegister),
394	work_list_(nullptr),
395	recursion_depth_(`0`),
396	is_one_byte_(is_one_byte),
397	reg_exp_too_big_(false),
398	read_backward_(false),
399	current_expansion_factor_(`1`),
400	zone_(Thread::Current()->zone()) {
401	accept_ = new (Z) EndNode (EndNode::ACCEPT, Z);
402	}
403
404	#if !defined(DART_PRECOMPILED_RUNTIME)
405	RegExpEngine::CompilationResult RegExpCompiler::Assemble(
406	IRRegExpMacroAssembler* macro_assembler,
407	RegExpNode* start,
408	intptr_t capture_count,
409	const String& pattern) {
410	macro_assembler->set_slow_safe(false / use_slow_safe_regexp_compiler /);
411	macro_assembler_ = macro_assembler;
412
413	ZoneGrowableArray<RegExpNode*> work_list(`0`);
414	work_list_ = &work_list;
415	BlockLabel fail;
416	macro_assembler_->PushBacktrack(label: &fail);
417	Trace new_trace;
418	start->Emit(compiler: this, trace: &new_trace);
419	macro_assembler_->BindBlock(label: &fail);
420	macro_assembler_->Fail();
421	while (!work_list.is_empty()) {
422	work_list.RemoveLast()->Emit(compiler: this, trace: &new_trace);
423	}
424	if (reg_exp_too_big_) return IrregexpRegExpTooBig();
425
426	macro_assembler->GenerateBacktrackBlock();
427	macro_assembler->FinalizeRegistersArray();
428
429	return RegExpEngine::CompilationResult (
430	macro_assembler->backtrack_goto(), macro_assembler->graph_entry(),
431	macro_assembler->num_blocks(), macro_assembler->num_stack_locals(),
432	next_register_);
433	}
434	#endif
435
436	RegExpEngine::CompilationResult RegExpCompiler::Assemble(
437	BytecodeRegExpMacroAssembler* macro_assembler,
438	RegExpNode* start,
439	intptr_t capture_count,
440	const String& pattern) {
441	macro_assembler->set_slow_safe(false / use_slow_safe_regexp_compiler /);
442	macro_assembler_ = macro_assembler;
443
444	ZoneGrowableArray<RegExpNode*> work_list(`0`);
445	work_list_ = &work_list;
446	BlockLabel fail;
447	macro_assembler_->PushBacktrack(label: &fail);
448	Trace new_trace;
449	start->Emit(compiler: this, trace: &new_trace);
450	macro_assembler_->BindBlock(label: &fail);
451	macro_assembler_->Fail();
452	while (!work_list.is_empty()) {
453	work_list.RemoveLast()->Emit(compiler: this, trace: &new_trace);
454	}
455	if (reg_exp_too_big_) return IrregexpRegExpTooBig();
456
457	TypedData& bytecode = TypedData::ZoneHandle(ptr: macro_assembler->GetBytecode());
458	return RegExpEngine::CompilationResult (&bytecode, next_register_);
459	}
460
461	bool Trace::DeferredAction::Mentions(intptr_t that) {
462	if (action_type() == ActionNode::CLEAR_CAPTURES) {
463	Interval range = static_cast<DeferredClearCaptures>(this*)->range();
464	return range.Contains(value: that);
465	} else {
466	return reg() == that;
467	}
468	}
469
470	bool Trace::mentions_reg(intptr_t reg) {
471	for (DeferredAction* action = actions_; action != nullptr;
472	action = action->next()) {
473	if (action->Mentions(that: reg)) return true;
474	}
475	return false;
476	}
477
478	bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {
479	ASSERT(*cp_offset == `0`);
480	for (DeferredAction* action = actions_; action != nullptr;
481	action = action->next()) {
482	if (action->Mentions(that: reg)) {
483	if (action->action_type() == ActionNode::STORE_POSITION) {
484	cp_offset = static_cast<DeferredCapture>(action)->cp_offset();
485	return true;
486	} else {
487	return false;
488	}
489	}
490	}
491	return false;
492	}
493
494	// This is called as we come into a loop choice node and some other tricky
495	// nodes. It normalizes the state of the code generator to ensure we can
496	// generate generic code.
497	intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
498	intptr_t max_register = RegExpCompiler::kNoRegister;
499	for (DeferredAction* action = actions_; action != nullptr;
500	action = action->next()) {
501	if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
502	Interval range = static_cast<DeferredClearCaptures*>(action)->range();
503	for (intptr_t i = range.from(); i <= range.to(); i++)
504	affected_registers->Set(value: i, zone);
505	if (range.to() > max_register) max_register = range.to();
506	} else {
507	affected_registers->Set(value: action->reg(), zone);
508	if (action->reg() > max_register) max_register = action->reg();
509	}
510	}
511	return max_register;
512	}
513
514	void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
515	intptr_t max_register,
516	const OutSet& registers_to_pop,
517	const OutSet& registers_to_clear) {
518	for (intptr_t reg = max_register; reg >= `0`; reg--) {
519	if (registers_to_pop.Get(value: reg)) {
520	assembler->PopRegister(register_index: reg);
521	} else if (registers_to_clear.Get(value: reg)) {
522	intptr_t clear_to = reg;
523	while (reg > `0` && registers_to_clear.Get(value: reg - `1`)) {
524	reg--;
525	}
526	assembler->ClearRegisters(reg_from: reg, reg_to: clear_to);
527	}
528	}
529	}
530
531	void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
532	intptr_t max_register,
533	const OutSet& affected_registers,
534	OutSet* registers_to_pop,
535	OutSet* registers_to_clear,
536	Zone* zone) {
537	for (intptr_t reg = `0`; reg <= max_register; reg++) {
538	if (!affected_registers.Get(value: reg)) {
539	continue;
540	}
541
542	// The chronologically first deferred action in the trace
543	// is used to infer the action needed to restore a register
544	// to its previous state (or not, if it's safe to ignore it).
545	enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR };
546	DeferredActionUndoType undo_action = ACTION_IGNORE;
547
548	intptr_t value = `0`;
549	bool absolute = false;
550	bool clear = false;
551	const intptr_t kNoStore = kMinInt32;
552	intptr_t store_position = kNoStore;
553	// This is a little tricky because we are scanning the actions in reverse
554	// historical order (newest first).
555	for (DeferredAction* action = actions_; action != nullptr;
556	action = action->next()) {
557	if (action->Mentions(that: reg)) {
558	switch (action->action_type()) {
559	case ActionNode::SET_REGISTER: {
560	Trace::DeferredSetRegister* psr =
561	static_cast<Trace::DeferredSetRegister*>(action);
562	if (!absolute) {
563	value += psr->value();
564	absolute = true;
565	}
566	// SET_REGISTER is currently only used for newly introduced loop
567	// counters. They can have a significant previous value if they
568	// occur in a loop. TODO(lrn): Propagate this information, so we
569	// can set undo_action to ACTION_IGNORE if we know there is no
570	// value to restore.
571	undo_action = ACTION_RESTORE;
572	ASSERT(store_position == kNoStore);
573	ASSERT(!clear);
574	break;
575	}
576	case ActionNode::INCREMENT_REGISTER:
577	if (!absolute) {
578	value++;
579	}
580	ASSERT(store_position == kNoStore);
581	ASSERT(!clear);
582	undo_action = ACTION_RESTORE;
583	break;
584	case ActionNode::STORE_POSITION: {
585	Trace::DeferredCapture* pc =
586	static_cast<Trace::DeferredCapture*>(action);
587	if (!clear && store_position == kNoStore) {
588	store_position = pc->cp_offset();
589	}
590
591	// For captures we know that stores and clears alternate.
592	// Other register, are never cleared, and if the occur
593	// inside a loop, they might be assigned more than once.
594	if (reg <= `1`) {
595	// Registers zero and one, aka "capture zero", is
596	// always set correctly if we succeed. There is no
597	// need to undo a setting on backtrack, because we
598	// will set it again or fail.
599	undo_action = ACTION_IGNORE;
600	} else {
601	undo_action = pc->is_capture() ? ACTION_CLEAR : ACTION_RESTORE;
602	}
603	ASSERT(!absolute);
604	ASSERT(value == `0`);
605	break;
606	}
607	case ActionNode::CLEAR_CAPTURES: {
608	// Since we're scanning in reverse order, if we've already
609	// set the position we have to ignore historically earlier
610	// clearing operations.
611	if (store_position == kNoStore) {
612	clear = true;
613	}
614	undo_action = ACTION_RESTORE;
615	ASSERT(!absolute);
616	ASSERT(value == `0`);
617	break;
618	}
619	default:
620	UNREACHABLE();
621	break;
622	}
623	}
624	}
625	// Prepare for the undo-action (e.g., push if it's going to be popped).
626	if (undo_action == ACTION_RESTORE) {
627	assembler->PushRegister(register_index: reg);
628	registers_to_pop->Set(value: reg, zone);
629	} else if (undo_action == ACTION_CLEAR) {
630	registers_to_clear->Set(value: reg, zone);
631	}
632	// Perform the chronologically last action (or accumulated increment)
633	// for the register.
634	if (store_position != kNoStore) {
635	assembler->WriteCurrentPositionToRegister(reg, cp_offset: store_position);
636	} else if (clear) {
637	assembler->ClearRegisters(reg_from: reg, reg_to: reg);
638	} else if (absolute) {
639	assembler->SetRegister(register_index: reg, to: value);
640	} else if (value != `0`) {
641	assembler->AdvanceRegister(reg, by: value);
642	}
643	}
644	}
645
646	// This is called as we come into a loop choice node and some other tricky
647	// nodes. It normalizes the state of the code generator to ensure we can
648	// generate generic code.
649	void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
650	RegExpMacroAssembler* assembler = compiler->macro_assembler();
651
652	ASSERT(!is_trivial());
653
654	if (actions_ == nullptr && backtrack() == nullptr) {
655	// Here we just have some deferred cp advances to fix and we are back to
656	// a normal situation. We may also have to forget some information gained
657	// through a quick check that was already performed.
658	if (cp_offset_ != `0`) assembler->AdvanceCurrentPosition(by: cp_offset_);
659	// Create a new trivial state and generate the node with that.
660	Trace new_state;
661	successor->Emit(compiler, trace: &new_state);
662	return;
663	}
664
665	// Generate deferred actions here along with code to undo them again.
666	OutSet affected_registers;
667
668	if (backtrack() != nullptr) {
669	// Here we have a concrete backtrack location. These are set up by choice
670	// nodes and so they indicate that we have a deferred save of the current
671	// position which we may need to emit here.
672	assembler->PushCurrentPosition();
673	}
674	Zone* zone = successor->zone();
675	intptr_t max_register = FindAffectedRegisters(affected_registers: &affected_registers, zone);
676	OutSet registers_to_pop;
677	OutSet registers_to_clear;
678	PerformDeferredActions(assembler, max_register, affected_registers,
679	registers_to_pop: &registers_to_pop, registers_to_clear: &registers_to_clear, zone);
680	if (cp_offset_ != `0`) {
681	assembler->AdvanceCurrentPosition(by: cp_offset_);
682	}
683
684	// Create a new trivial state and generate the node with that.
685	BlockLabel undo;
686	assembler->PushBacktrack(label: &undo);
687	Trace new_state;
688	successor->Emit(compiler, trace: &new_state);
689
690	// On backtrack we need to restore state.
691	assembler->BindBlock(label: &undo);
692	RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
693	registers_to_clear);
694	if (backtrack() == nullptr) {
695	assembler->Backtrack();
696	} else {
697	assembler->PopCurrentPosition();
698	assembler->GoTo(to: backtrack());
699	}
700	}
701
702	void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
703	RegExpMacroAssembler* assembler = compiler->macro_assembler();
704
705	// Omit flushing the trace. We discard the entire stack frame anyway.
706
707	if (!label()->is_bound()) {
708	// We are completely independent of the trace, since we ignore it,
709	// so this code can be used as the generic version.
710	assembler->BindBlock(label: label());
711	}
712
713	// Throw away everything on the backtrack stack since the start
714	// of the negative submatch and restore the character position.
715	assembler->ReadCurrentPositionFromRegister(reg: current_position_register_);
716	assembler->ReadStackPointerFromRegister(reg: stack_pointer_register_);
717	if (clear_capture_count_ > `0`) {
718	// Clear any captures that might have been performed during the success
719	// of the body of the negative look-ahead.
720	int clear_capture_end = clear_capture_start_ + clear_capture_count_ - `1`;
721	assembler->ClearRegisters(reg_from: clear_capture_start_, reg_to: clear_capture_end);
722	}
723	// Now that we have unwound the stack we find at the top of the stack the
724	// backtrack that the BeginSubmatch node got.
725	assembler->Backtrack();
726	}
727
728	void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
729	if (!trace->is_trivial()) {
730	trace->Flush(compiler, successor: this);
731	return;
732	}
733	RegExpMacroAssembler* assembler = compiler->macro_assembler();
734	if (!label()->is_bound()) {
735	assembler->BindBlock(label: label());
736	}
737	switch (action_) {
738	case ACCEPT:
739	assembler->Succeed();
740	return;
741	case BACKTRACK:
742	assembler->GoTo(to: trace->backtrack());
743	return;
744	case NEGATIVE_SUBMATCH_SUCCESS:
745	// This case is handled in a different virtual method.
746	UNREACHABLE();
747	}
748	UNIMPLEMENTED();
749	}
750
751	void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
752	if (guards_ == nullptr) guards_ = new (zone) ZoneGrowableArray<Guard*>(`1`);
753	guards_->Add(value: guard);
754	}
755
756	ActionNode* ActionNode::SetRegister(intptr_t reg,
757	intptr_t val,
758	RegExpNode* on_success) {
759	ActionNode* result =
760	new (on_success->zone()) ActionNode (SET_REGISTER, on_success);
761	result->data_.u_store_register.reg = reg;
762	result->data_.u_store_register.value = val;
763	return result;
764	}
765
766	ActionNode* ActionNode::IncrementRegister(intptr_t reg,
767	RegExpNode* on_success) {
768	ActionNode* result =
769	new (on_success->zone()) ActionNode (INCREMENT_REGISTER, on_success);
770	result->data_.u_increment_register.reg = reg;
771	return result;
772	}
773
774	ActionNode* ActionNode::StorePosition(intptr_t reg,
775	bool is_capture,
776	RegExpNode* on_success) {
777	ActionNode* result =
778	new (on_success->zone()) ActionNode (STORE_POSITION, on_success);
779	result->data_.u_position_register.reg = reg;
780	result->data_.u_position_register.is_capture = is_capture;
781	return result;
782	}
783
784	ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
785	ActionNode* result =
786	new (on_success->zone()) ActionNode (CLEAR_CAPTURES, on_success);
787	result->data_.u_clear_captures.range_from = range.from();
788	result->data_.u_clear_captures.range_to = range.to();
789	return result;
790	}
791
792	ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg,
793	intptr_t position_reg,
794	RegExpNode* on_success) {
795	ActionNode* result =
796	new (on_success->zone()) ActionNode (BEGIN_SUBMATCH, on_success);
797	result->data_.u_submatch.stack_pointer_register = stack_reg;
798	result->data_.u_submatch.current_position_register = position_reg;
799	return result;
800	}
801
802	ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg,
803	intptr_t position_reg,
804	intptr_t clear_register_count,
805	intptr_t clear_register_from,
806	RegExpNode* on_success) {
807	ActionNode* result = new (on_success->zone())
808	ActionNode (POSITIVE_SUBMATCH_SUCCESS, on_success);
809	result->data_.u_submatch.stack_pointer_register = stack_reg;
810	result->data_.u_submatch.current_position_register = position_reg;
811	result->data_.u_submatch.clear_register_count = clear_register_count;
812	result->data_.u_submatch.clear_register_from = clear_register_from;
813	return result;
814	}
815
816	ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register,
817	intptr_t repetition_register,
818	intptr_t repetition_limit,
819	RegExpNode* on_success) {
820	ActionNode* result =
821	new (on_success->zone()) ActionNode (EMPTY_MATCH_CHECK, on_success);
822	result->data_.u_empty_match_check.start_register = start_register;
823	result->data_.u_empty_match_check.repetition_register = repetition_register;
824	result->data_.u_empty_match_check.repetition_limit = repetition_limit;
825	return result;
826	}
827
828	#define DEFINE_ACCEPT(Type) \
829	void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
830	FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
831	#undef DEFINE_ACCEPT
832
833	void LoopChoiceNode::Accept(NodeVisitor* visitor) {
834	visitor->VisitLoopChoice(that: this);
835	}
836
837	// -------------------------------------------------------------------
838	// Emit code.
839
840	void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
841	Guard* guard,
842	Trace* trace) {
843	switch (guard->op()) {
844	case Guard::LT:
845	ASSERT(!trace->mentions_reg(guard->reg()));
846	macro_assembler->IfRegisterGE(reg: guard->reg(), comparand: guard->value(),
847	if_ge: trace->backtrack());
848	break;
849	case Guard::GEQ:
850	ASSERT(!trace->mentions_reg(guard->reg()));
851	macro_assembler->IfRegisterLT(reg: guard->reg(), comparand: guard->value(),
852	if_lt: trace->backtrack());
853	break;
854	}
855	}
856
857	// Returns the number of characters in the equivalence class, omitting those
858	// that cannot occur in the source string because it is ASCII.
859	static intptr_t GetCaseIndependentLetters(uint16_t character,
860	bool one_byte_subject,
861	int32_t* letters) {
862	unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
863	intptr_t length = jsregexp_uncanonicalize.get(c: character, n: `'\0'`, result: letters);
864	// Unibrow returns 0 or 1 for characters where case independence is
865	// trivial.
866	if (length == `0`) {
867	letters[`0`] = character;
868	length = `1`;
869	}
870	if (!one_byte_subject \|\| character <= Symbols::kMaxOneCharCodeSymbol) {
871	return length;
872	}
873
874	// The standard requires that non-ASCII characters cannot have ASCII
875	// character codes in their equivalence class.
876	// TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
877	// is it? For example, \u00C5 is equivalent to \u212B.
878	return `0`;
879	}
880
881	static inline bool EmitSimpleCharacter(Zone* zone,
882	RegExpCompiler* compiler,
883	uint16_t c,
884	BlockLabel* on_failure,
885	intptr_t cp_offset,
886	bool check,
887	bool preloaded) {
888	RegExpMacroAssembler* assembler = compiler->macro_assembler();
889	bool bound_checked = false;
890	if (!preloaded) {
891	assembler->LoadCurrentCharacter(cp_offset, on_end_of_input: on_failure, check_bounds: check);
892	bound_checked = true;
893	}
894	assembler->CheckNotCharacter(c, on_not_equal: on_failure);
895	return bound_checked;
896	}
897
898	// Only emits non-letters (things that don't have case). Only used for case
899	// independent matches.
900	static inline bool EmitAtomNonLetter(Zone* zone,
901	RegExpCompiler* compiler,
902	uint16_t c,
903	BlockLabel* on_failure,
904	intptr_t cp_offset,
905	bool check,
906	bool preloaded) {
907	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
908	bool one_byte = compiler->one_byte();
909	int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
910	intptr_t length = GetCaseIndependentLetters(character: c, one_byte_subject: one_byte, letters: chars);
911	if (length < `1`) {
912	// This can't match. Must be an one-byte subject and a non-one-byte
913	// character. We do not need to do anything since the one-byte pass
914	// already handled this.
915	return false; // Bounds not checked.
916	}
917	bool checked = false;
918	// We handle the length > 1 case in a later pass.
919	if (length == `1`) {
920	if (one_byte && c > Symbols::kMaxOneCharCodeSymbol) {
921	// Can't match - see above.
922	return false; // Bounds not checked.
923	}
924	if (!preloaded) {
925	macro_assembler->LoadCurrentCharacter(cp_offset, on_end_of_input: on_failure, check_bounds: check);
926	checked = check;
927	}
928	macro_assembler->CheckNotCharacter(c, on_not_equal: on_failure);
929	}
930	return checked;
931	}
932
933	static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
934	bool one_byte,
935	uint16_t c1,
936	uint16_t c2,
937	BlockLabel* on_failure) {
938	uint16_t char_mask;
939	if (one_byte) {
940	char_mask = Symbols::kMaxOneCharCodeSymbol;
941	} else {
942	char_mask = Utf16::kMaxCodeUnit;
943	}
944	uint16_t exor = c1 ^ c2;
945	// Check whether exor has only one bit set.
946	if (((exor - `1`) & exor) == `0`) {
947	// If c1 and c2 differ only by one bit.
948	// Ecma262UnCanonicalize always gives the highest number last.
949	ASSERT(c2 > c1);
950	uint16_t mask = char_mask ^ exor;
951	macro_assembler->CheckNotCharacterAfterAnd(c: c1, and_with: mask, on_not_equal: on_failure);
952	return true;
953	}
954	ASSERT(c2 > c1);
955	uint16_t diff = c2 - c1;
956	if (((diff - `1`) & diff) == `0` && c1 >= diff) {
957	// If the characters differ by 2^n but don't differ by one bit then
958	// subtract the difference from the found character, then do the or
959	// trick. We avoid the theoretical case where negative numbers are
960	// involved in order to simplify code generation.
961	uint16_t mask = char_mask ^ diff;
962	macro_assembler->CheckNotCharacterAfterMinusAnd(c: c1 - diff, minus: diff, and_with: mask,
963	on_not_equal: on_failure);
964	return true;
965	}
966	return false;
967	}
968
969	typedef bool EmitCharacterFunction(Zone* zone,
970	RegExpCompiler* compiler,
971	uint16_t c,
972	BlockLabel* on_failure,
973	intptr_t cp_offset,
974	bool check,
975	bool preloaded);
976
977	// Only emits letters (things that have case). Only used for case independent
978	// matches.
979	static inline bool EmitAtomLetter(Zone* zone,
980	RegExpCompiler* compiler,
981	uint16_t c,
982	BlockLabel* on_failure,
983	intptr_t cp_offset,
984	bool check,
985	bool preloaded) {
986	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
987	bool one_byte = compiler->one_byte();
988	int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
989	intptr_t length = GetCaseIndependentLetters(character: c, one_byte_subject: one_byte, letters: chars);
990	if (length <= `1`) return false;
991	// We may not need to check against the end of the input string
992	// if this character lies before a character that matched.
993	if (!preloaded) {
994	macro_assembler->LoadCurrentCharacter(cp_offset, on_end_of_input: on_failure, check_bounds: check);
995	}
996	BlockLabel ok;
997	ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == `4`);
998	switch (length) {
999	case `2`: {
1000	if (ShortCutEmitCharacterPair(macro_assembler, one_byte, c1: chars[`0`],
1001	c2: chars[`1`], on_failure)) {
1002	} else {
1003	macro_assembler->CheckCharacter(c: chars[`0`], on_equal: &ok);
1004	macro_assembler->CheckNotCharacter(c: chars[`1`], on_not_equal: on_failure);
1005	macro_assembler->BindBlock(label: &ok);
1006	}
1007	break;
1008	}
1009	case `4`:
1010	macro_assembler->CheckCharacter(c: chars[`3`], on_equal: &ok);
1011	FALL_THROUGH;
1012	case `3`:
1013	macro_assembler->CheckCharacter(c: chars[`0`], on_equal: &ok);
1014	macro_assembler->CheckCharacter(c: chars[`1`], on_equal: &ok);
1015	macro_assembler->CheckNotCharacter(c: chars[`2`], on_not_equal: on_failure);
1016	macro_assembler->BindBlock(label: &ok);
1017	break;
1018	default:
1019	UNREACHABLE();
1020	break;
1021	}
1022	return true;
1023	}
1024
1025	static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1026	uint16_t border,
1027	BlockLabel* fall_through,
1028	BlockLabel* above_or_equal,
1029	BlockLabel* below) {
1030	if (below != fall_through) {
1031	masm->CheckCharacterLT(limit: border, on_less: below);
1032	if (above_or_equal != fall_through) masm->GoTo(to: above_or_equal);
1033	} else {
1034	masm->CheckCharacterGT(limit: border - `1`, on_greater: above_or_equal);
1035	}
1036	}
1037
1038	static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1039	uint16_t first,
1040	uint16_t last,
1041	BlockLabel* fall_through,
1042	BlockLabel* in_range,
1043	BlockLabel* out_of_range) {
1044	if (in_range == fall_through) {
1045	if (first == last) {
1046	masm->CheckNotCharacter(c: first, on_not_equal: out_of_range);
1047	} else {
1048	masm->CheckCharacterNotInRange(from: first, to: last, on_not_in_range: out_of_range);
1049	}
1050	} else {
1051	if (first == last) {
1052	masm->CheckCharacter(c: first, on_equal: in_range);
1053	} else {
1054	masm->CheckCharacterInRange(from: first, to: last, on_in_range: in_range);
1055	}
1056	if (out_of_range != fall_through) masm->GoTo(to: out_of_range);
1057	}
1058	}
1059
1060	// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1061	// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1062	static void EmitUseLookupTable(RegExpMacroAssembler* masm,
1063	ZoneGrowableArray<uint16_t>* ranges,
1064	intptr_t start_index,
1065	intptr_t end_index,
1066	uint16_t min_char,
1067	BlockLabel* fall_through,
1068	BlockLabel* even_label,
1069	BlockLabel* odd_label) {
1070	const intptr_t kSize = RegExpMacroAssembler::kTableSize;
1071	const intptr_t kMask = RegExpMacroAssembler::kTableMask;
1072
1073	intptr_t base = (min_char & ~kMask);
1074
1075	// Assert that everything is on one kTableSize page.
1076	for (intptr_t i = start_index; i <= end_index; i++) {
1077	ASSERT((ranges->At(i) & ~kMask) == base);
1078	}
1079	ASSERT(start_index == `0` \|\| (ranges->At(start_index - `1`) & ~kMask) <= base);
1080
1081	char templ[kSize];
1082	BlockLabel* on_bit_set;
1083	BlockLabel* on_bit_clear;
1084	intptr_t bit;
1085	if (even_label == fall_through) {
1086	on_bit_set = odd_label;
1087	on_bit_clear = even_label;
1088	bit = `1`;
1089	} else {
1090	on_bit_set = even_label;
1091	on_bit_clear = odd_label;
1092	bit = `0`;
1093	}
1094	for (intptr_t i = `0`; i < (ranges->At(index: start_index) & kMask) && i < kSize;
1095	i++) {
1096	templ[i] = bit;
1097	}
1098	intptr_t j = `0`;
1099	bit ^= `1`;
1100	for (intptr_t i = start_index; i < end_index; i++) {
1101	for (j = (ranges->At(index: i) & kMask); j < (ranges->At(index: i + `1`) & kMask); j++) {
1102	templ[j] = bit;
1103	}
1104	bit ^= `1`;
1105	}
1106	for (intptr_t i = j; i < kSize; i++) {
1107	templ[i] = bit;
1108	}
1109	// TODO(erikcorry): Cache these.
1110	const TypedData& ba = TypedData::ZoneHandle(
1111	zone: masm->zone(), ptr: TypedData::New(class_id: kTypedDataUint8ArrayCid, len: kSize, space: Heap::kOld));
1112	for (intptr_t i = `0`; i < kSize; i++) {
1113	ba.SetUint8(byte_offset: i, value: templ[i]);
1114	}
1115	masm->CheckBitInTable(table: ba, on_bit_set);
1116	if (on_bit_clear != fall_through) masm->GoTo(to: on_bit_clear);
1117	}
1118
1119	static void CutOutRange(RegExpMacroAssembler* masm,
1120	ZoneGrowableArray<uint16_t>* ranges,
1121	intptr_t start_index,
1122	intptr_t end_index,
1123	intptr_t cut_index,
1124	BlockLabel* even_label,
1125	BlockLabel* odd_label) {
1126	bool odd = (((cut_index - start_index) & `1`) == `1`);
1127	BlockLabel* in_range_label = odd ? odd_label : even_label;
1128	BlockLabel dummy;
1129	EmitDoubleBoundaryTest(masm, first: ranges->At(index: cut_index),
1130	last: ranges->At(index: cut_index + `1`) - `1`, fall_through: &dummy, in_range: in_range_label,
1131	out_of_range: &dummy);
1132	ASSERT(!dummy.is_linked());
1133	// Cut out the single range by rewriting the array. This creates a new
1134	// range that is a merger of the two ranges on either side of the one we
1135	// are cutting out. The oddity of the labels is preserved.
1136	for (intptr_t j = cut_index; j > start_index; j--) {
1137	(*ranges)[j] = ranges->At(index: j - `1`);
1138	}
1139	for (intptr_t j = cut_index + `1`; j < end_index; j++) {
1140	(*ranges)[j] = ranges->At(index: j + `1`);
1141	}
1142	}
1143
1144	// Unicode case. Split the search space into kSize spaces that are handled
1145	// with recursion.
1146	static void SplitSearchSpace(ZoneGrowableArray<uint16_t>* ranges,
1147	intptr_t start_index,
1148	intptr_t end_index,
1149	intptr_t* new_start_index,
1150	intptr_t* new_end_index,
1151	uint16_t* border) {
1152	const intptr_t kSize = RegExpMacroAssembler::kTableSize;
1153	const intptr_t kMask = RegExpMacroAssembler::kTableMask;
1154
1155	uint16_t first = ranges->At(index: start_index);
1156	uint16_t last = ranges->At(index: end_index) - `1`;
1157
1158	*new_start_index = start_index;
1159	*border = (ranges->At(index: start_index) & ~kMask) + kSize;
1160	while (*new_start_index < end_index) {
1161	if (ranges->At(index: new_start_index) > border) break;
1162	(*new_start_index)++;
1163	}
1164	// new_start_index is the index of the first edge that is beyond the
1165	// current kSize space.
1166
1167	// For very large search spaces we do a binary chop search of the non-Latin1
1168	// space instead of just going to the end of the current kSize space. The
1169	// heuristics are complicated a little by the fact that any 128-character
1170	// encoding space can be quickly tested with a table lookup, so we don't
1171	// wish to do binary chop search at a smaller granularity than that. A
1172	// 128-character space can take up a lot of space in the ranges array if,
1173	// for example, we only want to match every second character (eg. the lower
1174	// case characters on some Unicode pages).
1175	intptr_t binary_chop_index = (end_index + start_index) / `2`;
1176	// The first test ensures that we get to the code that handles the Latin1
1177	// range with a single not-taken branch, speeding up this important
1178	// character range (even non-Latin1 charset-based text has spaces and
1179	// punctuation).
1180	if (border - `1` > Symbols::kMaxOneCharCodeSymbol && // Latin1 case.*
1181	end_index - start_index > (new_start_index - start_index) `2` &&
1182	last - first > kSize * `2` && binary_chop_index > *new_start_index &&
1183	ranges->At(index: binary_chop_index) >= first + `2` * kSize) {
1184	intptr_t scan_forward_for_section_border = binary_chop_index;
1185	intptr_t new_border = (ranges->At(index: binary_chop_index) \| kMask) + `1`;
1186
1187	while (scan_forward_for_section_border < end_index) {
1188	if (ranges->At(index: scan_forward_for_section_border) > new_border) {
1189	*new_start_index = scan_forward_for_section_border;
1190	*border = new_border;
1191	break;
1192	}
1193	scan_forward_for_section_border++;
1194	}
1195	}
1196
1197	ASSERT(*new_start_index > start_index);
1198	new_end_index = new_start_index - `1`;
1199	if (ranges->At(index: new_end_index) == border) {
1200	(*new_end_index)--;
1201	}
1202	if (*border >= ranges->At(index: end_index)) {
1203	*border = ranges->At(index: end_index);
1204	new_start_index = end_index; // Won't be used.*
1205	*new_end_index = end_index - `1`;
1206	}
1207	}
1208
1209	// Gets a series of segment boundaries representing a character class. If the
1210	// character is in the range between an even and an odd boundary (counting from
1211	// start_index) then go to even_label, otherwise go to odd_label. We already
1212	// know that the character is in the range of min_char to max_char inclusive.
1213	// Either label can be null indicating backtracking. Either label can also be
1214	// equal to the fall_through label.
1215	static void GenerateBranches(RegExpMacroAssembler* masm,
1216	ZoneGrowableArray<uint16_t>* ranges,
1217	intptr_t start_index,
1218	intptr_t end_index,
1219	uint16_t min_char,
1220	uint16_t max_char,
1221	BlockLabel* fall_through,
1222	BlockLabel* even_label,
1223	BlockLabel* odd_label) {
1224	uint16_t first = ranges->At(index: start_index);
1225	uint16_t last = ranges->At(index: end_index) - `1`;
1226
1227	ASSERT(min_char < first);
1228
1229	// Just need to test if the character is before or on-or-after
1230	// a particular character.
1231	if (start_index == end_index) {
1232	EmitBoundaryTest(masm, border: first, fall_through, above_or_equal: even_label, below: odd_label);
1233	return;
1234	}
1235
1236	// Another almost trivial case: There is one interval in the middle that is
1237	// different from the end intervals.
1238	if (start_index + `1` == end_index) {
1239	EmitDoubleBoundaryTest(masm, first, last, fall_through, in_range: even_label,
1240	out_of_range: odd_label);
1241	return;
1242	}
1243
1244	// It's not worth using table lookup if there are very few intervals in the
1245	// character class.
1246	if (end_index - start_index <= `6`) {
1247	// It is faster to test for individual characters, so we look for those
1248	// first, then try arbitrary ranges in the second round.
1249	static intptr_t kNoCutIndex = -`1`;
1250	intptr_t cut = kNoCutIndex;
1251	for (intptr_t i = start_index; i < end_index; i++) {
1252	if (ranges->At(index: i) == ranges->At(index: i + `1`) - `1`) {
1253	cut = i;
1254	break;
1255	}
1256	}
1257	if (cut == kNoCutIndex) cut = start_index;
1258	CutOutRange(masm, ranges, start_index, end_index, cut_index: cut, even_label,
1259	odd_label);
1260	ASSERT(end_index - start_index >= `2`);
1261	GenerateBranches(masm, ranges, start_index: start_index + `1`, end_index: end_index - `1`, min_char,
1262	max_char, fall_through, even_label, odd_label);
1263	return;
1264	}
1265
1266	// If there are a lot of intervals in the regexp, then we will use tables to
1267	// determine whether the character is inside or outside the character class.
1268	const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;
1269
1270	if ((max_char >> kBits) == (min_char >> kBits)) {
1271	EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1272	fall_through, even_label, odd_label);
1273	return;
1274	}
1275
1276	if ((min_char >> kBits) != (first >> kBits)) {
1277	masm->CheckCharacterLT(limit: first, on_less: odd_label);
1278	GenerateBranches(masm, ranges, start_index: start_index + `1`, end_index, min_char: first, max_char,
1279	fall_through, even_label: odd_label, odd_label: even_label);
1280	return;
1281	}
1282
1283	intptr_t new_start_index = `0`;
1284	intptr_t new_end_index = `0`;
1285	uint16_t border = `0`;
1286
1287	SplitSearchSpace(ranges, start_index, end_index, new_start_index: &new_start_index,
1288	new_end_index: &new_end_index, border: &border);
1289
1290	BlockLabel handle_rest;
1291	BlockLabel* above = &handle_rest;
1292	if (border == last + `1`) {
1293	// We didn't find any section that started after the limit, so everything
1294	// above the border is one of the terminal labels.
1295	above = (end_index & `1`) != (start_index & `1`) ? odd_label : even_label;
1296	ASSERT(new_end_index == end_index - `1`);
1297	}
1298
1299	ASSERT(start_index <= new_end_index);
1300	ASSERT(new_start_index <= end_index);
1301	ASSERT(start_index < new_start_index);
1302	ASSERT(new_end_index < end_index);
1303	ASSERT(new_end_index + `1` == new_start_index \|\|
1304	(new_end_index + `2` == new_start_index &&
1305	border == ranges->At(new_end_index + `1`)));
1306	ASSERT(min_char < border - `1`);
1307	ASSERT(border < max_char);
1308	ASSERT(ranges->At(new_end_index) < border);
1309	ASSERT(border < ranges->At(new_start_index) \|\|
1310	(border == ranges->At(new_start_index) &&
1311	new_start_index == end_index && new_end_index == end_index - `1` &&
1312	border == last + `1`));
1313	ASSERT(new_start_index == `0` \|\| border >= ranges->At(new_start_index - `1`));
1314
1315	masm->CheckCharacterGT(limit: border - `1`, on_greater: above);
1316	BlockLabel dummy;
1317	GenerateBranches(masm, ranges, start_index, end_index: new_end_index, min_char,
1318	max_char: border - `1`, fall_through: &dummy, even_label, odd_label);
1319
1320	if (handle_rest.is_linked()) {
1321	masm->BindBlock(label: &handle_rest);
1322	bool flip = (new_start_index & `1`) != (start_index & `1`);
1323	GenerateBranches(masm, ranges, start_index: new_start_index, end_index, min_char: border, max_char,
1324	fall_through: &dummy, even_label: flip ? odd_label : even_label,
1325	odd_label: flip ? even_label : odd_label);
1326	}
1327	}
1328
1329	static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1330	RegExpCharacterClass* cc,
1331	bool one_byte,
1332	BlockLabel* on_failure,
1333	intptr_t cp_offset,
1334	bool check_offset,
1335	bool preloaded,
1336	Zone* zone) {
1337	ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
1338	if (!CharacterRange::IsCanonical(ranges)) {
1339	CharacterRange::Canonicalize(ranges);
1340	}
1341
1342	uint16_t max_char;
1343	if (one_byte) {
1344	max_char = Symbols::kMaxOneCharCodeSymbol;
1345	} else {
1346	max_char = Utf16::kMaxCodeUnit;
1347	}
1348
1349	intptr_t range_count = ranges->length();
1350
1351	intptr_t last_valid_range = range_count - `1`;
1352	while (last_valid_range >= `0`) {
1353	const CharacterRange& range = ranges->At(index: last_valid_range);
1354	if (range.from() <= max_char) {
1355	break;
1356	}
1357	last_valid_range--;
1358	}
1359
1360	if (last_valid_range < `0`) {
1361	if (!cc->is_negated()) {
1362	macro_assembler->GoTo(to: on_failure);
1363	}
1364	if (check_offset) {
1365	macro_assembler->CheckPosition(cp_offset, on_outside_input: on_failure);
1366	}
1367	return;
1368	}
1369
1370	if (last_valid_range == `0` && ranges->At(index: `0`).IsEverything(max: max_char)) {
1371	if (cc->is_negated()) {
1372	macro_assembler->GoTo(to: on_failure);
1373	} else {
1374	// This is a common case hit by non-anchored expressions.
1375	if (check_offset) {
1376	macro_assembler->CheckPosition(cp_offset, on_outside_input: on_failure);
1377	}
1378	}
1379	return;
1380	}
1381
1382	if (!preloaded) {
1383	macro_assembler->LoadCurrentCharacter(cp_offset, on_end_of_input: on_failure, check_bounds: check_offset);
1384	}
1385
1386	if (cc->is_standard() && macro_assembler->CheckSpecialCharacterClass(
1387	type: cc->standard_type(), on_no_match: on_failure)) {
1388	return;
1389	}
1390
1391	// A new list with ascending entries. Each entry is a code unit
1392	// where there is a boundary between code units that are part of
1393	// the class and code units that are not. Normally we insert an
1394	// entry at zero which goes to the failure label, but if there
1395	// was already one there we fall through for success on that entry.
1396	// Subsequent entries have alternating meaning (success/failure).
1397	ZoneGrowableArray<uint16_t>* range_boundaries =
1398	new (zone) ZoneGrowableArray<uint16_t>(last_valid_range);
1399
1400	bool zeroth_entry_is_failure = !cc->is_negated();
1401
1402	for (intptr_t i = `0`; i <= last_valid_range; i++) {
1403	const CharacterRange& range = ranges->At(index: i);
1404	if (range.from() == `0`) {
1405	ASSERT(i == `0`);
1406	zeroth_entry_is_failure = !zeroth_entry_is_failure;
1407	} else {
1408	range_boundaries->Add(value: range.from());
1409	}
1410	if (range.to() + `1` <= max_char) {
1411	range_boundaries->Add(value: range.to() + `1`);
1412	}
1413	}
1414	intptr_t end_index = range_boundaries->length() - `1`;
1415
1416	BlockLabel fall_through;
1417	GenerateBranches(masm: macro_assembler, ranges: range_boundaries,
1418	start_index: `0`, // start_index.
1419	end_index,
1420	min_char: `0`, // min_char.
1421	max_char, fall_through: &fall_through,
1422	even_label: zeroth_entry_is_failure ? &fall_through : on_failure,
1423	odd_label: zeroth_entry_is_failure ? on_failure : &fall_through);
1424	macro_assembler->BindBlock(label: &fall_through);
1425	}
1426
1427	RegExpNode::~RegExpNode() {}
1428
1429	RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1430	Trace* trace) {
1431	// If we are generating a greedy loop then don't stop and don't reuse code.
1432	if (trace->stop_node() != nullptr) {
1433	return CONTINUE;
1434	}
1435
1436	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1437	if (trace->is_trivial()) {
1438	if (label_.is_bound()) {
1439	// We are being asked to generate a generic version, but that's already
1440	// been done so just go to it.
1441	macro_assembler->GoTo(to: &label_);
1442	return DONE;
1443	}
1444	if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
1445	// To avoid too deep recursion we push the node to the work queue and just
1446	// generate a goto here.
1447	compiler->AddWork(node: this);
1448	macro_assembler->GoTo(to: &label_);
1449	return DONE;
1450	}
1451	// Generate generic version of the node and bind the label for later use.
1452	macro_assembler->BindBlock(label: &label_);
1453	return CONTINUE;
1454	}
1455
1456	// We are being asked to make a non-generic version. Keep track of how many
1457	// non-generic versions we generate so as not to overdo it.
1458	trace_count_++;
1459	if (kRegexpOptimization && trace_count_ < kMaxCopiesCodeGenerated &&
1460	compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1461	return CONTINUE;
1462	}
1463
1464	// If we get here code has been generated for this node too many times or
1465	// recursion is too deep. Time to switch to a generic version. The code for
1466	// generic versions above can handle deep recursion properly.
1467	trace->Flush(compiler, successor: this);
1468	return DONE;
1469	}
1470
1471	intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find,
1472	intptr_t budget,
1473	bool not_at_start) {
1474	if (budget <= `0`) return `0`;
1475	if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return `0`; // Rewinds input!
1476	return on_success()->EatsAtLeast(still_to_find, budget: budget - `1`, not_at_start);
1477	}
1478
1479	void ActionNode::FillInBMInfo(intptr_t offset,
1480	intptr_t budget,
1481	BoyerMooreLookahead* bm,
1482	bool not_at_start) {
1483	if (action_type_ == BEGIN_SUBMATCH) {
1484	bm->SetRest(offset);
1485	} else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1486	on_success()->FillInBMInfo(offset, budget: budget - `1`, bm, not_at_start);
1487	}
1488	SaveBMInfo(bm, not_at_start, offset);
1489	}
1490
1491	intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find,
1492	intptr_t budget,
1493	bool not_at_start) {
1494	if (budget <= `0`) return `0`;
1495	// If we know we are not at the start and we are asked "how many characters
1496	// will you match if you succeed?" then we can answer anything since false
1497	// implies false. So lets just return the max answer (still_to_find) since
1498	// that won't prevent us from preloading a lot of characters for the other
1499	// branches in the node graph.
1500	if (assertion_type() == AT_START && not_at_start) return still_to_find;
1501	return on_success()->EatsAtLeast(still_to_find, budget: budget - `1`, not_at_start);
1502	}
1503
1504	void AssertionNode::FillInBMInfo(intptr_t offset,
1505	intptr_t budget,
1506	BoyerMooreLookahead* bm,
1507	bool not_at_start) {
1508	// Match the behaviour of EatsAtLeast on this node.
1509	if (assertion_type() == AT_START && not_at_start) return;
1510	on_success()->FillInBMInfo(offset, budget: budget - `1`, bm, not_at_start);
1511	SaveBMInfo(bm, not_at_start, offset);
1512	}
1513
1514	intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find,
1515	intptr_t budget,
1516	bool not_at_start) {
1517	if (read_backward()) return `0`;
1518	if (budget <= `0`) return `0`;
1519	return on_success()->EatsAtLeast(still_to_find, budget: budget - `1`, not_at_start);
1520	}
1521
1522	intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,
1523	intptr_t budget,
1524	bool not_at_start) {
1525	if (read_backward()) return `0`;
1526	intptr_t answer = Length();
1527	if (answer >= still_to_find) return answer;
1528	if (budget <= `0`) return answer;
1529	// We are not at start after this node so we set the last argument to 'true'.
1530	return answer +
1531	on_success()->EatsAtLeast(still_to_find: still_to_find - answer, budget: budget - `1`, not_at_start: true);
1532	}
1533
1534	intptr_t NegativeLookaroundChoiceNode::EatsAtLeast(intptr_t still_to_find,
1535	intptr_t budget,
1536	bool not_at_start) {
1537	if (budget <= `0`) return `0`;
1538	// Alternative 0 is the negative lookahead, alternative 1 is what comes
1539	// afterwards.
1540	RegExpNode* node = (*alternatives_)[`1`].node();
1541	return node->EatsAtLeast(still_to_find, budget: budget - `1`, not_at_start);
1542	}
1543
1544	void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
1545	QuickCheckDetails* details,
1546	RegExpCompiler* compiler,
1547	intptr_t filled_in,
1548	bool not_at_start) {
1549	// Alternative 0 is the negative lookahead, alternative 1 is what comes
1550	// afterwards.
1551	RegExpNode* node = (*alternatives_)[`1`].node();
1552	return node->GetQuickCheckDetails(details, compiler, characters_filled_in: filled_in, not_at_start);
1553	}
1554
1555	intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find,
1556	intptr_t budget,
1557	RegExpNode* ignore_this_node,
1558	bool not_at_start) {
1559	if (budget <= `0`) return `0`;
1560	intptr_t min = `100`;
1561	intptr_t choice_count = alternatives_->length();
1562	budget = (budget - `1`) / choice_count;
1563	for (intptr_t i = `0`; i < choice_count; i++) {
1564	RegExpNode* node = (*alternatives_)[i].node();
1565	if (node == ignore_this_node) continue;
1566	intptr_t node_eats_at_least =
1567	node->EatsAtLeast(still_to_find, budget, not_at_start);
1568	if (node_eats_at_least < min) min = node_eats_at_least;
1569	if (min == `0`) return `0`;
1570	}
1571	return min;
1572	}
1573
1574	intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,
1575	intptr_t budget,
1576	bool not_at_start) {
1577	return EatsAtLeastHelper(still_to_find, budget: budget - `1`, ignore_this_node: loop_node_, not_at_start);
1578	}
1579
1580	intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find,
1581	intptr_t budget,
1582	bool not_at_start) {
1583	return EatsAtLeastHelper(still_to_find, budget, ignore_this_node: nullptr, not_at_start);
1584	}
1585
1586	// Takes the left-most 1-bit and smears it out, setting all bits to its right.
1587	static inline uint32_t SmearBitsRight(uint32_t v) {
1588	v \|= v >> `1`;
1589	v \|= v >> `2`;
1590	v \|= v >> `4`;
1591	v \|= v >> `8`;
1592	v \|= v >> `16`;
1593	return v;
1594	}
1595
1596	bool QuickCheckDetails::Rationalize(bool asc) {
1597	bool found_useful_op = false;
1598	uint32_t char_mask;
1599	if (asc) {
1600	char_mask = Symbols::kMaxOneCharCodeSymbol;
1601	} else {
1602	char_mask = Utf16::kMaxCodeUnit;
1603	}
1604	mask_ = `0`;
1605	value_ = `0`;
1606	intptr_t char_shift = `0`;
1607	for (intptr_t i = `0`; i < characters_; i++) {
1608	Position* pos = &positions_[i];
1609	if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != `0`) {
1610	found_useful_op = true;
1611	}
1612	mask_ \|= (pos->mask & char_mask) << char_shift;
1613	value_ \|= (pos->value & char_mask) << char_shift;
1614	char_shift += asc ? `8` : `16`;
1615	}
1616	return found_useful_op;
1617	}
1618
1619	bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1620	Trace* bounds_check_trace,
1621	Trace* trace,
1622	bool preload_has_checked_bounds,
1623	BlockLabel* on_possible_success,
1624	QuickCheckDetails* details,
1625	bool fall_through_on_failure) {
1626	if (details->characters() == `0`) return false;
1627	GetQuickCheckDetails(details, compiler, characters_filled_in: `0`,
1628	not_at_start: trace->at_start() == Trace::FALSE_VALUE);
1629	if (details->cannot_match()) return false;
1630	if (!details->Rationalize(asc: compiler->one_byte())) return false;
1631	ASSERT(details->characters() == `1` \|\|
1632	compiler->macro_assembler()->CanReadUnaligned());
1633	uint32_t mask = details->mask();
1634	uint32_t value = details->value();
1635
1636	RegExpMacroAssembler* assembler = compiler->macro_assembler();
1637
1638	if (trace->characters_preloaded() != details->characters()) {
1639	ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset());
1640	// We are attempting to preload the minimum number of characters
1641	// any choice would eat, so if the bounds check fails, then none of the
1642	// choices can succeed, so we can just immediately backtrack, rather
1643	// than go to the next choice.
1644	assembler->LoadCurrentCharacter(
1645	cp_offset: trace->cp_offset(), on_end_of_input: bounds_check_trace->backtrack(),
1646	check_bounds: !preload_has_checked_bounds, characters: details->characters());
1647	}
1648
1649	bool need_mask = true;
1650
1651	if (details->characters() == `1`) {
1652	// If number of characters preloaded is 1 then we used a byte or 16 bit
1653	// load so the value is already masked down.
1654	uint32_t char_mask;
1655	if (compiler->one_byte()) {
1656	char_mask = Symbols::kMaxOneCharCodeSymbol;
1657	} else {
1658	char_mask = Utf16::kMaxCodeUnit;
1659	}
1660	if ((mask & char_mask) == char_mask) need_mask = false;
1661	mask &= char_mask;
1662	} else {
1663	// For 2-character preloads in one-byte mode or 1-character preloads in
1664	// two-byte mode we also use a 16 bit load with zero extend.
1665	if (details->characters() == `2` && compiler->one_byte()) {
1666	if ((mask & `0xffff`) == `0xffff`) need_mask = false;
1667	} else if (details->characters() == `1` && !compiler->one_byte()) {
1668	if ((mask & `0xffff`) == `0xffff`) need_mask = false;
1669	} else {
1670	if (mask == `0xffffffff`) need_mask = false;
1671	}
1672	}
1673
1674	if (fall_through_on_failure) {
1675	if (need_mask) {
1676	assembler->CheckCharacterAfterAnd(c: value, and_with: mask, on_equal: on_possible_success);
1677	} else {
1678	assembler->CheckCharacter(c: value, on_equal: on_possible_success);
1679	}
1680	} else {
1681	if (need_mask) {
1682	assembler->CheckNotCharacterAfterAnd(c: value, and_with: mask, on_not_equal: trace->backtrack());
1683	} else {
1684	assembler->CheckNotCharacter(c: value, on_not_equal: trace->backtrack());
1685	}
1686	}
1687	return true;
1688	}
1689
1690	// Here is the meat of GetQuickCheckDetails (see also the comment on the
1691	// super-class in the .h file).
1692	//
1693	// We iterate along the text object, building up for each character a
1694	// mask and value that can be used to test for a quick failure to match.
1695	// The masks and values for the positions will be combined into a single
1696	// machine word for the current character width in order to be used in
1697	// generating a quick check.
1698	void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1699	RegExpCompiler* compiler,
1700	intptr_t characters_filled_in,
1701	bool not_at_start) {
1702	#if defined(__GNUC__) && defined(__BYTE_ORDER__)
1703	// TODO(zerny): Make the combination code byte-order independent.
1704	ASSERT(details->characters() == `1` \|\|
1705	(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__));
1706	#endif
1707	// Do not collect any quick check details if the text node reads backward,
1708	// since it reads in the opposite direction than we use for quick checks.
1709	if (read_backward()) return;
1710	ASSERT(characters_filled_in < details->characters());
1711	intptr_t characters = details->characters();
1712	int32_t char_mask;
1713	if (compiler->one_byte()) {
1714	char_mask = Symbols::kMaxOneCharCodeSymbol;
1715	} else {
1716	char_mask = Utf16::kMaxCodeUnit;
1717	}
1718	for (intptr_t k = `0`; k < elms_->length(); k++) {
1719	TextElement elm = elms_->At(index: k);
1720	if (elm.text_type() == TextElement::ATOM) {
1721	ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
1722	for (intptr_t i = `0`; i < characters && i < quarks->length(); i++) {
1723	QuickCheckDetails::Position* pos =
1724	details->positions(index: characters_filled_in);
1725	uint16_t c = quarks->At(index: i);
1726	if (c > char_mask) {
1727	// If we expect a non-Latin1 character from an one-byte string,
1728	// there is no way we can match. Not even case independent
1729	// matching can turn an Latin1 character into non-Latin1 or
1730	// vice versa.
1731	// TODO(dcarney): issue 3550. Verify that this works as expected.
1732	// For example, \u0178 is uppercase of \u00ff (y-umlaut).
1733	details->set_cannot_match();
1734	pos->determines_perfectly = false;
1735	return;
1736	}
1737	if (elm.atom()->ignore_case()) {
1738	int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1739	intptr_t length =
1740	GetCaseIndependentLetters(character: c, one_byte_subject: compiler->one_byte(), letters: chars);
1741	ASSERT(length != `0`); // Can only happen if c > char_mask (see above).
1742	if (length == `1`) {
1743	// This letter has no case equivalents, so it's nice and simple
1744	// and the mask-compare will determine definitely whether we have
1745	// a match at this character position.
1746	pos->mask = char_mask;
1747	pos->value = c;
1748	pos->determines_perfectly = true;
1749	} else {
1750	uint32_t common_bits = char_mask;
1751	uint32_t bits = chars[`0`];
1752	for (intptr_t j = `1`; j < length; j++) {
1753	uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1754	common_bits ^= differing_bits;
1755	bits &= common_bits;
1756	}
1757	// If length is 2 and common bits has only one zero in it then
1758	// our mask and compare instruction will determine definitely
1759	// whether we have a match at this character position. Otherwise
1760	// it can only be an approximate check.
1761	uint32_t one_zero = (common_bits \| ~char_mask);
1762	if (length == `2` && ((~one_zero) & ((~one_zero) - `1`)) == `0`) {
1763	pos->determines_perfectly = true;
1764	}
1765	pos->mask = common_bits;
1766	pos->value = bits;
1767	}
1768	} else {
1769	// Don't ignore case. Nice simple case where the mask-compare will
1770	// determine definitely whether we have a match at this character
1771	// position.
1772	pos->mask = char_mask;
1773	pos->value = c;
1774	pos->determines_perfectly = true;
1775	}
1776	characters_filled_in++;
1777	ASSERT(characters_filled_in <= details->characters());
1778	if (characters_filled_in == details->characters()) {
1779	return;
1780	}
1781	}
1782	} else {
1783	QuickCheckDetails::Position* pos =
1784	details->positions(index: characters_filled_in);
1785	RegExpCharacterClass* tree = elm.char_class();
1786	ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();
1787	ASSERT(!ranges->is_empty());
1788	if (!CharacterRange::IsCanonical(ranges)) {
1789	CharacterRange::Canonicalize(ranges);
1790	}
1791	if (tree->is_negated()) {
1792	// A quick check uses multi-character mask and compare. There is no
1793	// useful way to incorporate a negative char class into this scheme
1794	// so we just conservatively create a mask and value that will always
1795	// succeed.
1796	pos->mask = `0`;
1797	pos->value = `0`;
1798	} else {
1799	intptr_t first_range = `0`;
1800	while (ranges->At(index: first_range).from() > char_mask) {
1801	first_range++;
1802	if (first_range == ranges->length()) {
1803	details->set_cannot_match();
1804	pos->determines_perfectly = false;
1805	return;
1806	}
1807	}
1808	CharacterRange range = ranges->At(index: first_range);
1809	uint16_t from = range.from();
1810	uint16_t to = range.to();
1811	if (to > char_mask) {
1812	to = char_mask;
1813	}
1814	uint32_t differing_bits = (from ^ to);
1815	// A mask and compare is only perfect if the differing bits form a
1816	// number like 00011111 with one single block of trailing 1s.
1817	if ((differing_bits & (differing_bits + `1`)) == `0` &&
1818	from + differing_bits == to) {
1819	pos->determines_perfectly = true;
1820	}
1821	uint32_t common_bits = ~SmearBitsRight(v: differing_bits);
1822	uint32_t bits = (from & common_bits);
1823	for (intptr_t i = first_range + `1`; i < ranges->length(); i++) {
1824	CharacterRange range = ranges->At(index: i);
1825	uint16_t from = range.from();
1826	uint16_t to = range.to();
1827	if (from > char_mask) continue;
1828	if (to > char_mask) to = char_mask;
1829	// Here we are combining more ranges into the mask and compare
1830	// value. With each new range the mask becomes more sparse and
1831	// so the chances of a false positive rise. A character class
1832	// with multiple ranges is assumed never to be equivalent to a
1833	// mask and compare operation.
1834	pos->determines_perfectly = false;
1835	uint32_t new_common_bits = (from ^ to);
1836	new_common_bits = ~SmearBitsRight(v: new_common_bits);
1837	common_bits &= new_common_bits;
1838	bits &= new_common_bits;
1839	uint32_t differing_bits = (from & common_bits) ^ bits;
1840	common_bits ^= differing_bits;
1841	bits &= common_bits;
1842	}
1843	pos->mask = common_bits;
1844	pos->value = bits;
1845	}
1846	characters_filled_in++;
1847	ASSERT(characters_filled_in <= details->characters());
1848	if (characters_filled_in == details->characters()) {
1849	return;
1850	}
1851	}
1852	}
1853	ASSERT(characters_filled_in != details->characters());
1854	if (!details->cannot_match()) {
1855	on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1856	not_at_start: true);
1857	}
1858	}
1859
1860	void QuickCheckDetails::Clear() {
1861	for (int i = `0`; i < characters_; i++) {
1862	positions_[i].mask = `0`;
1863	positions_[i].value = `0`;
1864	positions_[i].determines_perfectly = false;
1865	}
1866	characters_ = `0`;
1867	}
1868
1869	void QuickCheckDetails::Advance(intptr_t by, bool one_byte) {
1870	if (by >= characters_ \|\| by < `0`) {
1871	// check that by < 0 => characters_ == 0
1872	ASSERT(by >= `0` \|\| characters_ == `0`);
1873	Clear();
1874	return;
1875	}
1876	for (intptr_t i = `0`; i < characters_ - by; i++) {
1877	positions_[i] = positions_[by + i];
1878	}
1879	for (intptr_t i = characters_ - by; i < characters_; i++) {
1880	positions_[i].mask = `0`;
1881	positions_[i].value = `0`;
1882	positions_[i].determines_perfectly = false;
1883	}
1884	characters_ -= by;
1885	// We could change mask_ and value_ here but we would never advance unless
1886	// they had already been used in a check and they won't be used again because
1887	// it would gain us nothing. So there's no point.
1888	}
1889
1890	void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) {
1891	ASSERT(characters_ == other->characters_);
1892	if (other->cannot_match_) {
1893	return;
1894	}
1895	if (cannot_match_) {
1896	*this = *other;
1897	return;
1898	}
1899	for (intptr_t i = from_index; i < characters_; i++) {
1900	QuickCheckDetails::Position* pos = positions(index: i);
1901	QuickCheckDetails::Position* other_pos = other->positions(index: i);
1902	if (pos->mask != other_pos->mask \|\| pos->value != other_pos->value \|\|
1903	!other_pos->determines_perfectly) {
1904	// Our mask-compare operation will be approximate unless we have the
1905	// exact same operation on both sides of the alternation.
1906	pos->determines_perfectly = false;
1907	}
1908	pos->mask &= other_pos->mask;
1909	pos->value &= pos->mask;
1910	other_pos->value &= pos->mask;
1911	uint16_t differing_bits = (pos->value ^ other_pos->value);
1912	pos->mask &= ~differing_bits;
1913	pos->value &= pos->mask;
1914	}
1915	}
1916
1917	class VisitMarker : public ValueObject {
1918	public:
1919	explicit VisitMarker(NodeInfo* info) : info_(info) {
1920	ASSERT(!info->visited);
1921	info->visited = true;
1922	}
1923	~VisitMarker() { info_->visited = false; }
1924
1925	private:
1926	NodeInfo* info_;
1927	};
1928
1929	RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth) {
1930	if (info()->replacement_calculated) return replacement();
1931	if (depth < `0`) return this;
1932	ASSERT(!info()->visited);
1933	VisitMarker marker(info());
1934	return FilterSuccessor(depth: depth - `1`);
1935	}
1936
1937	RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth) {
1938	RegExpNode* next = on_success_->FilterOneByte(depth: depth - `1`);
1939	if (next == nullptr) return set_replacement(nullptr);
1940	on_success_ = next;
1941	return set_replacement(this);
1942	}
1943
1944	// We need to check for the following characters: 0x39c 0x3bc 0x178.
1945	static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
1946	// TODO(dcarney): this could be a lot more efficient.
1947	return range.Contains(i: `0x39c`) \|\| range.Contains(i: `0x3bc`) \|\|
1948	range.Contains(i: `0x178`);
1949	}
1950
1951	static bool RangesContainLatin1Equivalents(
1952	ZoneGrowableArray<CharacterRange>* ranges) {
1953	for (intptr_t i = `0`; i < ranges->length(); i++) {
1954	// TODO(dcarney): this could be a lot more efficient.
1955	if (RangeContainsLatin1Equivalents(range: ranges->At(index: i))) return true;
1956	}
1957	return false;
1958	}
1959
1960	static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) {
1961	ASSERT(c > Symbols::kMaxOneCharCodeSymbol);
1962	switch (c) {
1963	// This are equivalent characters in unicode.
1964	case `0x39c`:
1965	case `0x3bc`:
1966	return `0xb5`;
1967	// This is an uppercase of a Latin-1 character
1968	// outside of Latin-1.
1969	case `0x178`:
1970	return `0xff`;
1971	}
1972	return `0`;
1973	}
1974
1975	RegExpNode* TextNode::FilterOneByte(intptr_t depth) {
1976	if (info()->replacement_calculated) return replacement();
1977	if (depth < `0`) return this;
1978	ASSERT(!info()->visited);
1979	VisitMarker marker(info());
1980	intptr_t element_count = elms_->length();
1981	for (intptr_t i = `0`; i < element_count; i++) {
1982	TextElement elm = elms_->At(index: i);
1983	if (elm.text_type() == TextElement::ATOM) {
1984	ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
1985	for (intptr_t j = `0`; j < quarks->length(); j++) {
1986	uint16_t c = quarks->At(index: j);
1987	if (c <= Symbols::kMaxOneCharCodeSymbol) continue;
1988	if (!elm.atom()->ignore_case()) return set_replacement(nullptr);
1989	// Here, we need to check for characters whose upper and lower cases
1990	// are outside the Latin-1 range.
1991	uint16_t converted = ConvertNonLatin1ToLatin1(c);
1992	// Character is outside Latin-1 completely
1993	if (converted == `0`) return set_replacement(nullptr);
1994	// Convert quark to Latin-1 in place.
1995	(*quarks)[`0`] = converted;
1996	}
1997	} else {
1998	ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
1999	RegExpCharacterClass* cc = elm.char_class();
2000	ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
2001	if (!CharacterRange::IsCanonical(ranges)) {
2002	CharacterRange::Canonicalize(ranges);
2003	}
2004	// Now they are in order so we only need to look at the first.
2005	intptr_t range_count = ranges->length();
2006	if (cc->is_negated()) {
2007	if (range_count != `0` && ranges->At(index: `0`).from() == `0` &&
2008	ranges->At(index: `0`).to() >= Symbols::kMaxOneCharCodeSymbol) {
2009	// This will be handled in a later filter.
2010	if (cc->flags().IgnoreCase() &&
2011	RangesContainLatin1Equivalents(ranges)) {
2012	continue;
2013	}
2014	return set_replacement(nullptr);
2015	}
2016	} else {
2017	if (range_count == `0` \|\|
2018	ranges->At(index: `0`).from() > Symbols::kMaxOneCharCodeSymbol) {
2019	// This will be handled in a later filter.
2020	if (cc->flags().IgnoreCase() &&
2021	RangesContainLatin1Equivalents(ranges))
2022	continue;
2023	return set_replacement(nullptr);
2024	}
2025	}
2026	}
2027	}
2028	return FilterSuccessor(depth: depth - `1`);
2029	}
2030
2031	RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth) {
2032	if (info()->replacement_calculated) return replacement();
2033	if (depth < `0`) return this;
2034	if (info()->visited) return this;
2035	{
2036	VisitMarker marker(info());
2037
2038	RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth: depth - `1`);
2039	// If we can't continue after the loop then there is no sense in doing the
2040	// loop.
2041	if (continue_replacement == nullptr) return set_replacement(nullptr);
2042	}
2043
2044	return ChoiceNode::FilterOneByte(depth: depth - `1`);
2045	}
2046
2047	RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth) {
2048	if (info()->replacement_calculated) return replacement();
2049	if (depth < `0`) return this;
2050	if (info()->visited) return this;
2051	VisitMarker marker(info());
2052	intptr_t choice_count = alternatives_->length();
2053
2054	for (intptr_t i = `0`; i < choice_count; i++) {
2055	GuardedAlternative alternative = alternatives_->At(index: i);
2056	if (alternative.guards() != nullptr &&
2057	alternative.guards()->length() != `0`) {
2058	set_replacement(this);
2059	return this;
2060	}
2061	}
2062
2063	intptr_t surviving = `0`;
2064	RegExpNode* survivor = nullptr;
2065	for (intptr_t i = `0`; i < choice_count; i++) {
2066	GuardedAlternative alternative = alternatives_->At(index: i);
2067	RegExpNode* replacement = alternative.node()->FilterOneByte(depth: depth - `1`);
2068	ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
2069	if (replacement != nullptr) {
2070	(*alternatives_)[i].set_node(replacement);
2071	surviving++;
2072	survivor = replacement;
2073	}
2074	}
2075	if (surviving < `2`) return set_replacement(survivor);
2076
2077	set_replacement(this);
2078	if (surviving == choice_count) {
2079	return this;
2080	}
2081	// Only some of the nodes survived the filtering. We need to rebuild the
2082	// alternatives list.
2083	ZoneGrowableArray<GuardedAlternative>* new_alternatives =
2084	new (Z) ZoneGrowableArray<GuardedAlternative>(surviving);
2085	for (intptr_t i = `0`; i < choice_count; i++) {
2086	RegExpNode* replacement =
2087	(*alternatives_)[i].node()->FilterOneByte(depth: depth - `1`);
2088	if (replacement != nullptr) {
2089	(*alternatives_)[i].set_node(replacement);
2090	new_alternatives->Add(value: (*alternatives_)[i]);
2091	}
2092	}
2093	alternatives_ = new_alternatives;
2094	return this;
2095	}
2096
2097	RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(intptr_t depth) {
2098	if (info()->replacement_calculated) return replacement();
2099	if (depth < `0`) return this;
2100	if (info()->visited) return this;
2101	VisitMarker marker(info());
2102	// Alternative 0 is the negative lookahead, alternative 1 is what comes
2103	// afterwards.
2104	RegExpNode* node = (*alternatives_)[`1`].node();
2105	RegExpNode* replacement = node->FilterOneByte(depth: depth - `1`);
2106	if (replacement == nullptr) return set_replacement(nullptr);
2107	(*alternatives_)[`1`].set_node(replacement);
2108
2109	RegExpNode* neg_node = (*alternatives_)[`0`].node();
2110	RegExpNode* neg_replacement = neg_node->FilterOneByte(depth: depth - `1`);
2111	// If the negative lookahead is always going to fail then
2112	// we don't need to check it.
2113	if (neg_replacement == nullptr) return set_replacement(replacement);
2114	(*alternatives_)[`0`].set_node(neg_replacement);
2115	return set_replacement(this);
2116	}
2117
2118	void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2119	RegExpCompiler* compiler,
2120	intptr_t characters_filled_in,
2121	bool not_at_start) {
2122	if (body_can_be_zero_length_ \|\| info()->visited) return;
2123	VisitMarker marker(info());
2124	return ChoiceNode::GetQuickCheckDetails(details, compiler,
2125	characters_filled_in, not_at_start);
2126	}
2127
2128	void LoopChoiceNode::FillInBMInfo(intptr_t offset,
2129	intptr_t budget,
2130	BoyerMooreLookahead* bm,
2131	bool not_at_start) {
2132	if (body_can_be_zero_length_ \|\| budget <= `0`) {
2133	bm->SetRest(offset);
2134	SaveBMInfo(bm, not_at_start, offset);
2135	return;
2136	}
2137	ChoiceNode::FillInBMInfo(offset, budget: budget - `1`, bm, not_at_start);
2138	SaveBMInfo(bm, not_at_start, offset);
2139	}
2140
2141	void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2142	RegExpCompiler* compiler,
2143	intptr_t characters_filled_in,
2144	bool not_at_start) {
2145	not_at_start = (not_at_start \|\| not_at_start_);
2146	intptr_t choice_count = alternatives_->length();
2147	ASSERT(choice_count > `0`);
2148	(*alternatives_)[`0`].node()->GetQuickCheckDetails(
2149	details, compiler, characters_filled_in, not_at_start);
2150	for (intptr_t i = `1`; i < choice_count; i++) {
2151	QuickCheckDetails new_details(details->characters());
2152	RegExpNode* node = (*alternatives_)[i].node();
2153	node->GetQuickCheckDetails(details: &new_details, compiler, characters_filled_in,
2154	not_at_start);
2155	// Here we merge the quick match details of the two branches.
2156	details->Merge(other: &new_details, from_index: characters_filled_in);
2157	}
2158	}
2159
2160	// Check for [0-9A-Z_a-z].
2161	static void EmitWordCheck(RegExpMacroAssembler* assembler,
2162	BlockLabel* word,
2163	BlockLabel* non_word,
2164	bool fall_through_on_word) {
2165	if (assembler->CheckSpecialCharacterClass(
2166	type: fall_through_on_word ? `'w'` : `'W'`,
2167	on_no_match: fall_through_on_word ? non_word : word)) {
2168	// Optimized implementation available.
2169	return;
2170	}
2171	assembler->CheckCharacterGT(limit: `'z'`, on_greater: non_word);
2172	assembler->CheckCharacterLT(limit: `'0'`, on_less: non_word);
2173	assembler->CheckCharacterGT(limit: `'a'` - `1`, on_greater: word);
2174	assembler->CheckCharacterLT(limit: `'9'` + `1`, on_less: word);
2175	assembler->CheckCharacterLT(limit: `'A'`, on_less: non_word);
2176	assembler->CheckCharacterLT(limit: `'Z'` + `1`, on_less: word);
2177	if (fall_through_on_word) {
2178	assembler->CheckNotCharacter(c: `'_'`, on_not_equal: non_word);
2179	} else {
2180	assembler->CheckCharacter(c: `'_'`, on_equal: word);
2181	}
2182	}
2183
2184	// Emit the code to check for a ^ in multiline mode (1-character lookbehind
2185	// that matches newline or the start of input).
2186	static void EmitHat(RegExpCompiler* compiler,
2187	RegExpNode* on_success,
2188	Trace* trace) {
2189	RegExpMacroAssembler* assembler = compiler->macro_assembler();
2190	// We will be loading the previous character into the current character
2191	// register.
2192	Trace new_trace(*trace);
2193	new_trace.InvalidateCurrentCharacter();
2194
2195	BlockLabel ok;
2196	if (new_trace.cp_offset() == `0`) {
2197	// The start of input counts as a newline in this context, so skip to
2198	// ok if we are at the start.
2199	assembler->CheckAtStart(on_at_start: &ok);
2200	}
2201	// We already checked that we are not at the start of input so it must be
2202	// OK to load the previous character.
2203	assembler->LoadCurrentCharacter(cp_offset: new_trace.cp_offset() - `1`,
2204	on_end_of_input: new_trace.backtrack(), check_bounds: false);
2205	if (!assembler->CheckSpecialCharacterClass(type: `'n'`, on_no_match: new_trace.backtrack())) {
2206	// Newline means \n, \r, 0x2028 or 0x2029.
2207	if (!compiler->one_byte()) {
2208	assembler->CheckCharacterAfterAnd(c: `0x2028`, and_with: `0xfffe`, on_equal: &ok);
2209	}
2210	assembler->CheckCharacter(c: `'\n'`, on_equal: &ok);
2211	assembler->CheckNotCharacter(c: `'\r'`, on_not_equal: new_trace.backtrack());
2212	}
2213	assembler->BindBlock(label: &ok);
2214	on_success->Emit(compiler, trace: &new_trace);
2215	}
2216
2217	// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2218	void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2219	RegExpMacroAssembler* assembler = compiler->macro_assembler();
2220	Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2221	bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2222	BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2223	if (lookahead == nullptr) {
2224	intptr_t eats_at_least =
2225	Utils::Minimum(x: kMaxLookaheadForBoyerMoore,
2226	y: EatsAtLeast(still_to_find: kMaxLookaheadForBoyerMoore, budget: kRecursionBudget,
2227	not_at_start));
2228	if (eats_at_least >= `1`) {
2229	BoyerMooreLookahead* bm =
2230	new (Z) BoyerMooreLookahead (eats_at_least, compiler, Z);
2231	FillInBMInfo(offset: `0`, budget: kRecursionBudget, bm, not_at_start);
2232	if (bm->at(i: `0`)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2233	if (bm->at(i: `0`)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2234	}
2235	} else {
2236	if (lookahead->at(i: `0`)->is_non_word())
2237	next_is_word_character = Trace::FALSE_VALUE;
2238	if (lookahead->at(i: `0`)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2239	}
2240	bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2241	if (next_is_word_character == Trace::UNKNOWN) {
2242	BlockLabel before_non_word;
2243	BlockLabel before_word;
2244	if (trace->characters_preloaded() != `1`) {
2245	assembler->LoadCurrentCharacter(cp_offset: trace->cp_offset(), on_end_of_input: &before_non_word);
2246	}
2247	// Fall through on non-word.
2248	EmitWordCheck(assembler, word: &before_word, non_word: &before_non_word, fall_through_on_word: false);
2249	// Next character is not a word character.
2250	assembler->BindBlock(label: &before_non_word);
2251	BlockLabel ok;
2252	// Backtrack on \B (non-boundary check) if previous is a word,
2253	// since we know next is not* a word and this would be a boundary.*
2254	BacktrackIfPrevious(compiler, trace, backtrack_if_previous: at_boundary ? kIsNonWord : kIsWord);
2255
2256	if (!assembler->IsClosed()) {
2257	assembler->GoTo(to: &ok);
2258	}
2259
2260	assembler->BindBlock(label: &before_word);
2261	BacktrackIfPrevious(compiler, trace, backtrack_if_previous: at_boundary ? kIsWord : kIsNonWord);
2262	assembler->BindBlock(label: &ok);
2263	} else if (next_is_word_character == Trace::TRUE_VALUE) {
2264	BacktrackIfPrevious(compiler, trace, backtrack_if_previous: at_boundary ? kIsWord : kIsNonWord);
2265	} else {
2266	ASSERT(next_is_word_character == Trace::FALSE_VALUE);
2267	BacktrackIfPrevious(compiler, trace, backtrack_if_previous: at_boundary ? kIsNonWord : kIsWord);
2268	}
2269	}
2270
2271	void AssertionNode::BacktrackIfPrevious(
2272	RegExpCompiler* compiler,
2273	Trace* trace,
2274	AssertionNode::IfPrevious backtrack_if_previous) {
2275	RegExpMacroAssembler* assembler = compiler->macro_assembler();
2276	Trace new_trace(*trace);
2277	new_trace.InvalidateCurrentCharacter();
2278
2279	BlockLabel fall_through, dummy;
2280
2281	BlockLabel* non_word = backtrack_if_previous == kIsNonWord
2282	? new_trace.backtrack()
2283	: &fall_through;
2284	BlockLabel* word = backtrack_if_previous == kIsNonWord
2285	? &fall_through
2286	: new_trace.backtrack();
2287
2288	if (new_trace.cp_offset() == `0`) {
2289	// The start of input counts as a non-word character, so the question is
2290	// decided if we are at the start.
2291	assembler->CheckAtStart(on_at_start: non_word);
2292	}
2293	// We already checked that we are not at the start of input so it must be
2294	// OK to load the previous character.
2295	assembler->LoadCurrentCharacter(cp_offset: new_trace.cp_offset() - `1`, on_end_of_input: &dummy, check_bounds: false);
2296	EmitWordCheck(assembler, word, non_word, fall_through_on_word: backtrack_if_previous == kIsNonWord);
2297
2298	assembler->BindBlock(label: &fall_through);
2299	on_success()->Emit(compiler, trace: &new_trace);
2300	}
2301
2302	void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2303	RegExpCompiler* compiler,
2304	intptr_t filled_in,
2305	bool not_at_start) {
2306	if (assertion_type_ == AT_START && not_at_start) {
2307	details->set_cannot_match();
2308	return;
2309	}
2310	return on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in: filled_in,
2311	not_at_start);
2312	}
2313
2314	void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2315	RegExpMacroAssembler* assembler = compiler->macro_assembler();
2316	switch (assertion_type_) {
2317	case AT_END: {
2318	BlockLabel ok;
2319	assembler->CheckPosition(cp_offset: trace->cp_offset(), on_outside_input: &ok);
2320	assembler->GoTo(to: trace->backtrack());
2321	assembler->BindBlock(label: &ok);
2322	break;
2323	}
2324	case AT_START: {
2325	if (trace->at_start() == Trace::FALSE_VALUE) {
2326	assembler->GoTo(to: trace->backtrack());
2327	return;
2328	}
2329	if (trace->at_start() == Trace::UNKNOWN) {
2330	assembler->CheckNotAtStart(cp_offset: trace->cp_offset(), on_not_at_start: trace->backtrack());
2331	Trace at_start_trace = *trace;
2332	at_start_trace.set_at_start(Trace::TRUE_VALUE);
2333	on_success()->Emit(compiler, trace: &at_start_trace);
2334	return;
2335	}
2336	} break;
2337	case AFTER_NEWLINE:
2338	EmitHat(compiler, on_success: on_success(), trace);
2339	return;
2340	case AT_BOUNDARY:
2341	case AT_NON_BOUNDARY: {
2342	EmitBoundaryCheck(compiler, trace);
2343	return;
2344	}
2345	}
2346	on_success()->Emit(compiler, trace);
2347	}
2348
2349	static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {
2350	if (quick_check == nullptr) return false;
2351	if (offset >= quick_check->characters()) return false;
2352	return quick_check->positions(index: offset)->determines_perfectly;
2353	}
2354
2355	static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) {
2356	if (index > *checked_up_to) {
2357	*checked_up_to = index;
2358	}
2359	}
2360
2361	// We call this repeatedly to generate code for each pass over the text node.
2362	// The passes are in increasing order of difficulty because we hope one
2363	// of the first passes will fail in which case we are saved the work of the
2364	// later passes. for example for the case independent regexp /%[asdfghjkl]a/
2365	// we will check the '%' in the first pass, the case independent 'a' in the
2366	// second pass and the character class in the last pass.
2367	//
2368	// The passes are done from right to left, so for example to test for /bar/
2369	// we will first test for an 'r' with offset 2, then an 'a' with offset 1
2370	// and then a 'b' with offset 0. This means we can avoid the end-of-input
2371	// bounds check most of the time. In the example we only need to check for
2372	// end-of-input when loading the putative 'r'.
2373	//
2374	// A slight complication involves the fact that the first character may already
2375	// be fetched into a register by the previous node. In this case we want to
2376	// do the test for that character first. We do this in separate passes. The
2377	// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2378	// pass has been performed then subsequent passes will have true in
2379	// first_element_checked to indicate that character does not need to be
2380	// checked again.
2381	//
2382	// In addition to all this we are passed a Trace, which can
2383	// contain an AlternativeGeneration object. In this AlternativeGeneration
2384	// object we can see details of any quick check that was already passed in
2385	// order to get to the code we are now generating. The quick check can involve
2386	// loading characters, which means we do not need to recheck the bounds
2387	// up to the limit the quick check already checked. In addition the quick
2388	// check can have involved a mask and compare operation which may simplify
2389	// or obviate the need for further checks at some character positions.
2390	void TextNode::TextEmitPass(RegExpCompiler* compiler,
2391	TextEmitPassType pass,
2392	bool preloaded,
2393	Trace* trace,
2394	bool first_element_checked,
2395	intptr_t* checked_up_to) {
2396	RegExpMacroAssembler* assembler = compiler->macro_assembler();
2397	bool one_byte = compiler->one_byte();
2398	BlockLabel* backtrack = trace->backtrack();
2399	QuickCheckDetails* quick_check = trace->quick_check_performed();
2400	intptr_t element_count = elms_->length();
2401	intptr_t backward_offset = read_backward() ? -Length() : `0`;
2402	for (intptr_t i = preloaded ? `0` : element_count - `1`; i >= `0`; i--) {
2403	TextElement elm = elms_->At(index: i);
2404	intptr_t cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
2405	if (elm.text_type() == TextElement::ATOM) {
2406	ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
2407	for (intptr_t j = preloaded ? `0` : quarks->length() - `1`; j >= `0`; j--) {
2408	if (SkipPass(pass, ignore_case: elm.atom()->ignore_case())) continue;
2409	if (first_element_checked && i == `0` && j == `0`) continue;
2410	if (DeterminedAlready(quick_check, offset: elm.cp_offset() + j)) continue;
2411	EmitCharacterFunction* emit_function = nullptr;
2412	uint16_t quark = quarks->At(index: j);
2413	if (elm.atom()->ignore_case()) {
2414	// Everywhere else we assume that a non-Latin-1 character cannot match
2415	// a Latin-1 character. Avoid the cases where this is assumption is
2416	// invalid by using the Latin1 equivalent instead.
2417	quark = Latin1::TryConvertToLatin1(c: quark);
2418	}
2419	switch (pass) {
2420	case NON_LATIN1_MATCH:
2421	ASSERT(one_byte);
2422	if (quark > Symbols::kMaxOneCharCodeSymbol) {
2423	assembler->GoTo(to: backtrack);
2424	return;
2425	}
2426	break;
2427	case NON_LETTER_CHARACTER_MATCH:
2428	emit_function = &EmitAtomNonLetter;
2429	break;
2430	case SIMPLE_CHARACTER_MATCH:
2431	emit_function = &EmitSimpleCharacter;
2432	break;
2433	case CASE_CHARACTER_MATCH:
2434	emit_function = &EmitAtomLetter;
2435	break;
2436	default:
2437	break;
2438	}
2439	if (emit_function != nullptr) {
2440	const bool bounds_check =
2441	*checked_up_to < (cp_offset + j) \|\| read_backward();
2442	bool bound_checked =
2443	emit_function(Z, compiler, quarks->At(index: j), backtrack,
2444	cp_offset + j, bounds_check, preloaded);
2445	if (bound_checked) UpdateBoundsCheck(index: cp_offset + j, checked_up_to);
2446	}
2447	}
2448	} else {
2449	ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2450	if (pass == CHARACTER_CLASS_MATCH) {
2451	if (first_element_checked && i == `0`) continue;
2452	if (DeterminedAlready(quick_check, offset: elm.cp_offset())) continue;
2453	RegExpCharacterClass* cc = elm.char_class();
2454	bool bounds_check = *checked_up_to < cp_offset \|\| read_backward();
2455	EmitCharClass(macro_assembler: assembler, cc, one_byte, on_failure: backtrack, cp_offset,
2456	check_offset: bounds_check, preloaded, Z);
2457	UpdateBoundsCheck(index: cp_offset, checked_up_to);
2458	}
2459	}
2460	}
2461	}
2462
2463	intptr_t TextNode::Length() {
2464	TextElement elm = elms_->Last();
2465	ASSERT(elm.cp_offset() >= `0`);
2466	return elm.cp_offset() + elm.length();
2467	}
2468
2469	bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) {
2470	TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass);
2471	if (ignore_case) {
2472	return pass == SIMPLE_CHARACTER_MATCH;
2473	} else {
2474	return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;
2475	}
2476	}
2477
2478	TextNode* TextNode::CreateForCharacterRanges(
2479	ZoneGrowableArray<CharacterRange>* ranges,
2480	bool read_backward,
2481	RegExpNode* on_success,
2482	RegExpFlags flags) {
2483	ASSERT(ranges != nullptr);
2484	ZoneGrowableArray<TextElement>* elms = new ZoneGrowableArray<TextElement>(`1`);
2485	elms->Add(value: TextElement::CharClass(char_class: new RegExpCharacterClass (ranges, flags)));
2486	return new TextNode (elms, read_backward, on_success);
2487	}
2488
2489	TextNode* TextNode::CreateForSurrogatePair(CharacterRange lead,
2490	CharacterRange trail,
2491	bool read_backward,
2492	RegExpNode* on_success,
2493	RegExpFlags flags) {
2494	auto lead_ranges = CharacterRange::List(zone: on_success->zone(), range: lead);
2495	auto trail_ranges = CharacterRange::List(zone: on_success->zone(), range: trail);
2496	auto elms = new ZoneGrowableArray<TextElement>(`2`);
2497
2498	elms->Add(
2499	value: TextElement::CharClass(char_class: new RegExpCharacterClass (lead_ranges, flags)));
2500	elms->Add(
2501	value: TextElement::CharClass(char_class: new RegExpCharacterClass (trail_ranges, flags)));
2502
2503	return new TextNode (elms, read_backward, on_success);
2504	}
2505
2506	// This generates the code to match a text node. A text node can contain
2507	// straight character sequences (possibly to be matched in a case-independent
2508	// way) and character classes. For efficiency we do not do this in a single
2509	// pass from left to right. Instead we pass over the text node several times,
2510	// emitting code for some character positions every time. See the comment on
2511	// TextEmitPass for details.
2512	void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2513	LimitResult limit_result = LimitVersions(compiler, trace);
2514	if (limit_result == DONE) return;
2515	ASSERT(limit_result == CONTINUE);
2516
2517	if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2518	compiler->SetRegExpTooBig();
2519	return;
2520	}
2521
2522	if (compiler->one_byte()) {
2523	intptr_t dummy = `0`;
2524	TextEmitPass(compiler, pass: NON_LATIN1_MATCH, preloaded: false, trace, first_element_checked: false, checked_up_to: &dummy);
2525	}
2526
2527	bool first_elt_done = false;
2528	intptr_t bound_checked_to = trace->cp_offset() - `1`;
2529	bound_checked_to += trace->bound_checked_up_to();
2530
2531	// If a character is preloaded into the current character register then
2532	// check that now.
2533	if (trace->characters_preloaded() == `1`) {
2534	for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
2535	TextEmitPass(compiler, pass: static_cast<TextEmitPassType>(pass), preloaded: true, trace,
2536	first_element_checked: false, checked_up_to: &bound_checked_to);
2537	}
2538	first_elt_done = true;
2539	}
2540
2541	for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
2542	TextEmitPass(compiler, pass: static_cast<TextEmitPassType>(pass), preloaded: false, trace,
2543	first_element_checked: first_elt_done, checked_up_to: &bound_checked_to);
2544	}
2545
2546	Trace successor_trace(*trace);
2547	// If we advance backward, we may end up at the start.
2548	successor_trace.AdvanceCurrentPositionInTrace(
2549	by: read_backward() ? -Length() : Length(), compiler);
2550	successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
2551	: Trace::FALSE_VALUE);
2552	RecursionCheck rc(compiler);
2553	on_success()->Emit(compiler, trace: &successor_trace);
2554	}
2555
2556	void Trace::InvalidateCurrentCharacter() {
2557	characters_preloaded_ = `0`;
2558	}
2559
2560	void Trace::AdvanceCurrentPositionInTrace(intptr_t by,
2561	RegExpCompiler* compiler) {
2562	// We don't have an instruction for shifting the current character register
2563	// down or for using a shifted value for anything so lets just forget that
2564	// we preloaded any characters into it.
2565	characters_preloaded_ = `0`;
2566	// Adjust the offsets of the quick check performed information. This
2567	// information is used to find out what we already determined about the
2568	// characters by means of mask and compare.
2569	quick_check_performed_.Advance(by, one_byte: compiler->one_byte());
2570	cp_offset_ += by;
2571	if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2572	compiler->SetRegExpTooBig();
2573	cp_offset_ = `0`;
2574	}
2575	bound_checked_up_to_ =
2576	Utils::Maximum(x: static_cast<intptr_t>(`0`), y: bound_checked_up_to_ - by);
2577	}
2578
2579	void TextNode::MakeCaseIndependent(bool is_one_byte) {
2580	intptr_t element_count = elms_->length();
2581	for (intptr_t i = `0`; i < element_count; i++) {
2582	TextElement elm = elms_->At(index: i);
2583	if (elm.text_type() == TextElement::CHAR_CLASS) {
2584	RegExpCharacterClass* cc = elm.char_class();
2585	bool case_equivalents_already_added =
2586	cc->flags().NeedsUnicodeCaseEquivalents();
2587	if (cc->flags().IgnoreCase() && !case_equivalents_already_added) {
2588	// None of the standard character classes is different in the case
2589	// independent case and it slows us down if we don't know that.
2590	if (cc->is_standard()) continue;
2591	CharacterRange::AddCaseEquivalents(ranges: cc->ranges(), is_one_byte, Z);
2592	}
2593	}
2594	}
2595	}
2596
2597	intptr_t TextNode::GreedyLoopTextLength() {
2598	TextElement elm = elms_->At(index: elms_->length() - `1`);
2599	return elm.cp_offset() + elm.length();
2600	}
2601
2602	RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2603	RegExpCompiler* compiler) {
2604	if (read_backward()) return nullptr;
2605	if (elms_->length() != `1`) return nullptr;
2606	TextElement elm = elms_->At(index: `0`);
2607	if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr;
2608	RegExpCharacterClass* node = elm.char_class();
2609	ZoneGrowableArray<CharacterRange>* ranges = node->ranges();
2610	if (!CharacterRange::IsCanonical(ranges)) {
2611	CharacterRange::Canonicalize(ranges);
2612	}
2613	if (node->is_negated()) {
2614	return ranges->length() == `0` ? on_success() : nullptr;
2615	}
2616	if (ranges->length() != `1`) return nullptr;
2617	uint32_t max_char;
2618	if (compiler->one_byte()) {
2619	max_char = Symbols::kMaxOneCharCodeSymbol;
2620	} else {
2621	max_char = Utf16::kMaxCodeUnit;
2622	}
2623	return ranges->At(index: `0`).IsEverything(max: max_char) ? on_success() : nullptr;
2624	}
2625
2626	// Finds the fixed match length of a sequence of nodes that goes from
2627	// this alternative and back to this choice node. If there are variable
2628	// length nodes or other complications in the way then return a sentinel
2629	// value indicating that a greedy loop cannot be constructed.
2630	intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative(
2631	const GuardedAlternative* alternative) {
2632	intptr_t length = `0`;
2633	RegExpNode* node = alternative->node();
2634	// Later we will generate code for all these text nodes using recursion
2635	// so we have to limit the max number.
2636	intptr_t recursion_depth = `0`;
2637	while (node != this) {
2638	if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2639	return kNodeIsTooComplexForGreedyLoops;
2640	}
2641	intptr_t node_length = node->GreedyLoopTextLength();
2642	if (node_length == kNodeIsTooComplexForGreedyLoops) {
2643	return kNodeIsTooComplexForGreedyLoops;
2644	}
2645	length += node_length;
2646	SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2647	node = seq_node->on_success();
2648	}
2649	return read_backward() ? -length : length;
2650	}
2651
2652	void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2653	ASSERT(loop_node_ == nullptr);
2654	AddAlternative(node: alt);
2655	loop_node_ = alt.node();
2656	}
2657
2658	void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2659	ASSERT(continue_node_ == nullptr);
2660	AddAlternative(node: alt);
2661	continue_node_ = alt.node();
2662	}
2663
2664	void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2665	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2666	if (trace->stop_node() == this) {
2667	// Back edge of greedy optimized loop node graph.
2668	intptr_t text_length =
2669	GreedyLoopTextLengthForAlternative(alternative: &alternatives_->At(index: `0`));
2670	ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
2671	// Update the counter-based backtracking info on the stack. This is an
2672	// optimization for greedy loops (see below).
2673	ASSERT(trace->cp_offset() == text_length);
2674	macro_assembler->AdvanceCurrentPosition(by: text_length);
2675	macro_assembler->GoTo(to: trace->loop_label());
2676	return;
2677	}
2678	ASSERT(trace->stop_node() == nullptr);
2679	if (!trace->is_trivial()) {
2680	trace->Flush(compiler, successor: this);
2681	return;
2682	}
2683	ChoiceNode::Emit(compiler, trace);
2684	}
2685
2686	intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2687	intptr_t eats_at_least) {
2688	intptr_t preload_characters =
2689	Utils::Minimum(x: static_cast<intptr_t>(`4`), y: eats_at_least);
2690	if (compiler->one_byte()) {
2691	#if !defined(DART_COMPRESSED_POINTERS) && !defined(TARGET_ARCH_RISCV32)
2692	if (preload_characters > `4`) preload_characters = `4`;
2693	// We can't preload 3 characters because there is no machine instruction
2694	// to do that. We can't just load 4 because we could be reading
2695	// beyond the end of the string, which could cause a memory fault.
2696	if (preload_characters == `3`) preload_characters = `2`;
2697	#else
2698	// Ensure LoadCodeUnitsInstr can always produce a Smi. See
2699	// https://github.com/dart-lang/sdk/issues/29951
2700	if (preload_characters > `2`) preload_characters = `2`;
2701	#endif
2702	} else {
2703	#if !defined(DART_COMPRESSED_POINTERS) && !defined(TARGET_ARCH_RISCV32)
2704	if (preload_characters > `2`) preload_characters = `2`;
2705	#else
2706	// Ensure LoadCodeUnitsInstr can always produce a Smi. See
2707	// https://github.com/dart-lang/sdk/issues/29951
2708	if (preload_characters > `1`) preload_characters = `1`;
2709	#endif
2710	}
2711	if (!compiler->macro_assembler()->CanReadUnaligned()) {
2712	if (preload_characters > `1`) preload_characters = `1`;
2713	}
2714	return preload_characters;
2715	}
2716
2717	// This structure is used when generating the alternatives in a choice node. It
2718	// records the way the alternative is being code generated.
2719	struct AlternativeGeneration {
2720	AlternativeGeneration()
2721	: possible_success (),
2722	expects_preload(false),
2723	after (),
2724	quick_check_details () {}
2725	BlockLabel possible_success;
2726	bool expects_preload;
2727	BlockLabel after;
2728	QuickCheckDetails quick_check_details;
2729	};
2730
2731	// Creates a list of AlternativeGenerations. If the list has a reasonable
2732	// size then it is on the stack, otherwise the excess is on the heap.
2733	class AlternativeGenerationList {
2734	public:
2735	explicit AlternativeGenerationList(intptr_t count) : count_(count) {
2736	ASSERT(count >= `0`);
2737	if (count > kAFew) {
2738	excess_alt_gens_.reset(p: new AlternativeGeneration[count - kAFew]);
2739	}
2740	}
2741
2742	AlternativeGeneration* at(intptr_t i) {
2743	ASSERT(`0` <= i);
2744	ASSERT(i < count_);
2745	if (i < kAFew) {
2746	return &a_few_alt_gens_[i];
2747	}
2748	return &excess_alt_gens_[i - kAFew];
2749	}
2750
2751	private:
2752	static constexpr intptr_t kAFew = `10`;
2753
2754	intptr_t count_;
2755	AlternativeGeneration a_few_alt_gens_[kAFew];
2756	std::unique_ptr<AlternativeGeneration[]> excess_alt_gens_;
2757
2758	DISALLOW_ALLOCATION();
2759	DISALLOW_COPY_AND_ASSIGN(AlternativeGenerationList);
2760	};
2761
2762	static constexpr int32_t kRangeEndMarker = Utf::kMaxCodePoint + `1`;
2763
2764	// The '2' variant is inclusive from and exclusive to.
2765	// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
2766	// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
2767	// 0x180E has been removed from Unicode's Zs category and thus
2768	// from ECMAScript's WhiteSpace category as of Unicode 6.3.
2769	static constexpr int32_t kSpaceRanges[] = {
2770	`'\t'`, `'\r'` + `1`, `' '`, `' '` + `1`, `0x00A0`, `0x00A1`, `0x1680`,
2771	`0x1681`, `0x2000`, `0x200B`, `0x2028`, `0x202A`, `0x202F`, `0x2030`,
2772	`0x205F`, `0x2060`, `0x3000`, `0x3001`, `0xFEFF`, `0xFF00`, kRangeEndMarker};
2773	static constexpr intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
2774	static constexpr int32_t kWordRanges[] = {
2775	`'0'`, `'9'` + `1`, `'A'`, `'Z'` + `1`, `'_'`, `'_'` + `1`, `'a'`, `'z'` + `1`, kRangeEndMarker};
2776	static constexpr intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);
2777	static constexpr int32_t kDigitRanges[] = {`'0'`, `'9'` + `1`, kRangeEndMarker};
2778	static constexpr intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
2779	static constexpr int32_t kSurrogateRanges[] = {`0xd800`, `0xe000`, kRangeEndMarker};
2780	static constexpr intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
2781	static constexpr int32_t kLineTerminatorRanges[] = {
2782	`0x000A`, `0x000B`, `0x000D`, `0x000E`, `0x2028`, `0x202A`, kRangeEndMarker};
2783	static constexpr intptr_t kLineTerminatorRangeCount =
2784	ARRAY_SIZE(kLineTerminatorRanges);
2785
2786	void BoyerMoorePositionInfo::Set(intptr_t character) {
2787	SetInterval(Interval (character, character));
2788	}
2789
2790	void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2791	s_ = AddRange(containment: s_, ranges: kSpaceRanges, ranges_length: kSpaceRangeCount, new_range: interval);
2792	w_ = AddRange(containment: w_, ranges: kWordRanges, ranges_length: kWordRangeCount, new_range: interval);
2793	d_ = AddRange(containment: d_, ranges: kDigitRanges, ranges_length: kDigitRangeCount, new_range: interval);
2794	surrogate_ =
2795	AddRange(containment: surrogate_, ranges: kSurrogateRanges, ranges_length: kSurrogateRangeCount, new_range: interval);
2796	if (interval.to() - interval.from() >= kMapSize - `1`) {
2797	if (map_count_ != kMapSize) {
2798	map_count_ = kMapSize;
2799	for (intptr_t i = `0`; i < kMapSize; i++)
2800	(map_)[i] = true*;
2801	}
2802	return;
2803	}
2804	for (intptr_t i = interval.from(); i <= interval.to(); i++) {
2805	intptr_t mod_character = (i & kMask);
2806	if (!map_->At(index: mod_character)) {
2807	map_count_++;
2808	(map_)[mod_character] = true*;
2809	}
2810	if (map_count_ == kMapSize) return;
2811	}
2812	}
2813
2814	void BoyerMoorePositionInfo::SetAll() {
2815	s_ = w_ = d_ = kLatticeUnknown;
2816	if (map_count_ != kMapSize) {
2817	map_count_ = kMapSize;
2818	for (intptr_t i = `0`; i < kMapSize; i++)
2819	(map_)[i] = true*;
2820	}
2821	}
2822
2823	BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length,
2824	RegExpCompiler* compiler,
2825	Zone* zone)
2826	: length_(length), compiler_(compiler) {
2827	if (compiler->one_byte()) {
2828	max_char_ = Symbols::kMaxOneCharCodeSymbol;
2829	} else {
2830	max_char_ = Utf16::kMaxCodeUnit;
2831	}
2832	bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);
2833	for (intptr_t i = `0`; i < length; i++) {
2834	bitmaps_->Add(value: new (zone) BoyerMoorePositionInfo (zone));
2835	}
2836	}
2837
2838	// Find the longest range of lookahead that has the fewest number of different
2839	// characters that can occur at a given position. Since we are optimizing two
2840	// different parameters at once this is a tradeoff.
2841	bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {
2842	intptr_t biggest_points = `0`;
2843	// If more than 32 characters out of 128 can occur it is unlikely that we can
2844	// be lucky enough to step forwards much of the time.
2845	const intptr_t kMaxMax = `32`;
2846	for (intptr_t max_number_of_chars = `4`; max_number_of_chars < kMaxMax;
2847	max_number_of_chars *= `2`) {
2848	biggest_points =
2849	FindBestInterval(max_number_of_chars, old_biggest_points: biggest_points, from, to);
2850	}
2851	if (biggest_points == `0`) return false;
2852	return true;
2853	}
2854
2855	// Find the highest-points range between 0 and length_ where the character
2856	// information is not too vague. 'Too vague' means that there are more than
2857	// max_number_of_chars that can occur at this position. Calculates the number
2858	// of points as the product of width-of-the-range and
2859	// probability-of-finding-one-of-the-characters, where the probability is
2860	// calculated using the frequency distribution of the sample subject string.
2861	intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars,
2862	intptr_t old_biggest_points,
2863	intptr_t* from,
2864	intptr_t* to) {
2865	intptr_t biggest_points = old_biggest_points;
2866	static constexpr intptr_t kSize = RegExpMacroAssembler::kTableSize;
2867	for (intptr_t i = `0`; i < length_;) {
2868	while (i < length_ && Count(map_number: i) > max_number_of_chars)
2869	i++;
2870	if (i == length_) break;
2871	intptr_t remembered_from = i;
2872	bool union_map[kSize];
2873	for (intptr_t j = `0`; j < kSize; j++)
2874	union_map[j] = false;
2875	while (i < length_ && Count(map_number: i) <= max_number_of_chars) {
2876	BoyerMoorePositionInfo* map = bitmaps_->At(index: i);
2877	for (intptr_t j = `0`; j < kSize; j++)
2878	union_map[j] \|= map->at(i: j);
2879	i++;
2880	}
2881	intptr_t frequency = `0`;
2882	for (intptr_t j = `0`; j < kSize; j++) {
2883	if (union_map[j]) {
2884	// Add 1 to the frequency to give a small per-character boost for
2885	// the cases where our sampling is not good enough and many
2886	// characters have a frequency of zero. This means the frequency
2887	// can theoretically be up to 2kSize though we treat it mostly as*
2888	// a fraction of kSize.
2889	frequency += compiler_->frequency_collator()->Frequency(in_character: j) + `1`;
2890	}
2891	}
2892	// We use the probability of skipping times the distance we are skipping to
2893	// judge the effectiveness of this. Actually we have a cut-off: By
2894	// dividing by 2 we switch off the skipping if the probability of skipping
2895	// is less than 50%. This is because the multibyte mask-and-compare
2896	// skipping in quickcheck is more likely to do well on this case.
2897	bool in_quickcheck_range =
2898	((i - remembered_from < `4`) \|\|
2899	(compiler_->one_byte() ? remembered_from <= `4` : remembered_from <= `2`));
2900	// Called 'probability' but it is only a rough estimate and can actually
2901	// be outside the 0-kSize range.
2902	intptr_t probability =
2903	(in_quickcheck_range ? kSize / `2` : kSize) - frequency;
2904	intptr_t points = (i - remembered_from) * probability;
2905	if (points > biggest_points) {
2906	*from = remembered_from;
2907	*to = i - `1`;
2908	biggest_points = points;
2909	}
2910	}
2911	return biggest_points;
2912	}
2913
2914	// Take all the characters that will not prevent a successful match if they
2915	// occur in the subject string in the range between min_lookahead and
2916	// max_lookahead (inclusive) measured from the current position. If the
2917	// character at max_lookahead offset is not one of these characters, then we
2918	// can safely skip forwards by the number of characters in the range.
2919	intptr_t BoyerMooreLookahead::GetSkipTable(
2920	intptr_t min_lookahead,
2921	intptr_t max_lookahead,
2922	const TypedData& boolean_skip_table) {
2923	const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2924
2925	const intptr_t kSkipArrayEntry = `0`;
2926	const intptr_t kDontSkipArrayEntry = `1`;
2927
2928	for (intptr_t i = `0`; i < kSize; i++) {
2929	boolean_skip_table.SetUint8(byte_offset: i, value: kSkipArrayEntry);
2930	}
2931	intptr_t skip = max_lookahead + `1` - min_lookahead;
2932
2933	for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {
2934	BoyerMoorePositionInfo* map = bitmaps_->At(index: i);
2935	for (intptr_t j = `0`; j < kSize; j++) {
2936	if (map->at(i: j)) {
2937	boolean_skip_table.SetUint8(byte_offset: j, value: kDontSkipArrayEntry);
2938	}
2939	}
2940	}
2941
2942	return skip;
2943	}
2944
2945	// See comment above on the implementation of GetSkipTable.
2946	void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2947	const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2948
2949	intptr_t min_lookahead = `0`;
2950	intptr_t max_lookahead = `0`;
2951
2952	if (!FindWorthwhileInterval(from: &min_lookahead, to: &max_lookahead)) return;
2953
2954	bool found_single_character = false;
2955	intptr_t single_character = `0`;
2956	for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {
2957	BoyerMoorePositionInfo* map = bitmaps_->At(index: i);
2958	if (map->map_count() > `1` \|\|
2959	(found_single_character && map->map_count() != `0`)) {
2960	found_single_character = false;
2961	break;
2962	}
2963	for (intptr_t j = `0`; j < kSize; j++) {
2964	if (map->at(i: j)) {
2965	found_single_character = true;
2966	single_character = j;
2967	break;
2968	}
2969	}
2970	}
2971
2972	intptr_t lookahead_width = max_lookahead + `1` - min_lookahead;
2973
2974	if (found_single_character && lookahead_width == `1` && max_lookahead < `3`) {
2975	// The mask-compare can probably handle this better.
2976	return;
2977	}
2978
2979	if (found_single_character) {
2980	BlockLabel cont, again;
2981	masm->BindBlock(label: &again);
2982	masm->LoadCurrentCharacter(cp_offset: max_lookahead, on_end_of_input: &cont, check_bounds: true);
2983	if (max_char_ > kSize) {
2984	masm->CheckCharacterAfterAnd(c: single_character,
2985	and_with: RegExpMacroAssembler::kTableMask, on_equal: &cont);
2986	} else {
2987	masm->CheckCharacter(c: single_character, on_equal: &cont);
2988	}
2989	masm->AdvanceCurrentPosition(by: lookahead_width);
2990	masm->GoTo(to: &again);
2991	masm->BindBlock(label: &cont);
2992	return;
2993	}
2994
2995	const TypedData& boolean_skip_table = TypedData::ZoneHandle(
2996	zone: compiler_->zone(),
2997	ptr: TypedData::New(class_id: kTypedDataUint8ArrayCid, len: kSize, space: Heap::kOld));
2998	intptr_t skip_distance =
2999	GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
3000	ASSERT(skip_distance != `0`);
3001
3002	BlockLabel cont, again;
3003
3004	masm->BindBlock(label: &again);
3005	masm->CheckPreemption(/is_backtrack=/false);
3006	masm->LoadCurrentCharacter(cp_offset: max_lookahead, on_end_of_input: &cont, check_bounds: true);
3007	masm->CheckBitInTable(table: boolean_skip_table, on_bit_set: &cont);
3008	masm->AdvanceCurrentPosition(by: skip_distance);
3009	masm->GoTo(to: &again);
3010	masm->BindBlock(label: &cont);
3011
3012	return;
3013	}
3014
3015	/ Code generation for choice nodes.*
3016	*
3017	* We generate quick checks that do a mask and compare to eliminate a
3018	* choice. If the quick check succeeds then it jumps to the continuation to
3019	* do slow checks and check subsequent nodes. If it fails (the common case)
3020	* it falls through to the next choice.
3021	*
3022	* Here is the desired flow graph. Nodes directly below each other imply
3023	* fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3024	* 3 doesn't have a quick check so we have to call the slow check.
3025	* Nodes are marked Qn for quick checks and Sn for slow checks. The entire
3026	* regexp continuation is generated directly after the Sn node, up to the
3027	* next GoTo if we decide to reuse some already generated code. Some
3028	* nodes expect preload_characters to be preloaded into the current
3029	* character register. R nodes do this preloading. Vertices are marked
3030	* F for failures and S for success (possible success in the case of quick
3031	* nodes). L, V, < and > are used as arrow heads.
3032	*
3033	* ----------> R
3034	* \|
3035	* V
3036	* Q1 -----> S1
3037	* \| S /
3038	* F\| /
3039	* \| F/
3040	* \| /
3041	* \| R
3042	* \| /
3043	* V L
3044	* Q2 -----> S2
3045	* \| S /
3046	* F\| /
3047	* \| F/
3048	* \| /
3049	* \| R
3050	* \| /
3051	* V L
3052	* S3
3053	* \|
3054	* F\|
3055	* \|
3056	* R
3057	* \|
3058	* backtrack V
3059	* <----------Q4
3060	* \ F \|
3061	* \ \|S
3062	* \ F V
3063	* \-----S4
3064	*
3065	* For greedy loops we push the current position, then generate the code that
3066	* eats the input specially in EmitGreedyLoop. The other choice (the
3067	* continuation) is generated by the normal code in EmitChoices, and steps back
3068	* in the input to the starting position when it fails to match. The loop code
3069	* looks like this (U is the unwind code that steps back in the greedy loop).
3070	*
3071	* _____
3072	* / \
3073	* V \|
3074	* ----------> S1 \|
3075	* /\| \|
3076	* / \|S \|
3077	* F/ \_____/
3078	* /
3079	* \|<-----
3080	* \| \
3081	* V \|S
3082	* Q2 ---> U----->backtrack
3083	* \| F /
3084	* S\| /
3085	* V F /
3086	* S2--/
3087	*/
3088
3089	GreedyLoopState::GreedyLoopState(bool not_at_start) {
3090	counter_backtrack_trace_.set_backtrack(&label_);
3091	if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3092	}
3093
3094	void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3095	#ifdef DEBUG
3096	intptr_t choice_count = alternatives_->length();
3097	for (intptr_t i = `0`; i < choice_count - `1`; i++) {
3098	GuardedAlternative alternative = alternatives_->At(i);
3099	ZoneGrowableArray<Guard> guards = alternative.guards();
3100	intptr_t guard_count = (guards == nullptr) ? `0` : guards->length();
3101	for (intptr_t j = `0`; j < guard_count; j++) {
3102	ASSERT(!trace->mentions_reg(guards->At(j)->reg()));
3103	}
3104	}
3105	#endif
3106	}
3107
3108	void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3109	Trace* current_trace,
3110	PreloadState* state) {
3111	if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3112	// Save some time by looking at most one machine word ahead.
3113	state->eats_at_least_ =
3114	EatsAtLeast(still_to_find: compiler->one_byte() ? `4` : `2`, budget: kRecursionBudget,
3115	not_at_start: current_trace->at_start() == Trace::FALSE_VALUE);
3116	}
3117	state->preload_characters_ =
3118	CalculatePreloadCharacters(compiler, eats_at_least: state->eats_at_least_);
3119
3120	state->preload_is_current_ =
3121	(current_trace->characters_preloaded() == state->preload_characters_);
3122	state->preload_has_checked_bounds_ = state->preload_is_current_;
3123	}
3124
3125	void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3126	intptr_t choice_count = alternatives_->length();
3127
3128	if (choice_count == `1` && alternatives_->At(index: `0`).guards() == nullptr) {
3129	alternatives_->At(index: `0`).node()->Emit(compiler, trace);
3130	return;
3131	}
3132
3133	AssertGuardsMentionRegisters(trace);
3134
3135	LimitResult limit_result = LimitVersions(compiler, trace);
3136	if (limit_result == DONE) return;
3137	ASSERT(limit_result == CONTINUE);
3138
3139	// For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3140	// other choice nodes we only flush if we are out of code size budget.
3141	if (trace->flush_budget() == `0` && trace->actions() != nullptr) {
3142	trace->Flush(compiler, successor: this);
3143	return;
3144	}
3145
3146	RecursionCheck rc(compiler);
3147
3148	PreloadState preload;
3149	preload.init();
3150	GreedyLoopState greedy_loop_state(not_at_start());
3151
3152	intptr_t text_length =
3153	GreedyLoopTextLengthForAlternative(alternative: &alternatives_->At(index: `0`));
3154	AlternativeGenerationList alt_gens(choice_count);
3155
3156	if (choice_count > `1` && text_length != kNodeIsTooComplexForGreedyLoops) {
3157	trace = EmitGreedyLoop(compiler, trace, alt_gens: &alt_gens, preloads: &preload,
3158	greedy_loop_state: &greedy_loop_state, text_length);
3159	} else {
3160	// TODO(erikcorry): Delete this. We don't need this label, but it makes us
3161	// match the traces produced pre-cleanup.
3162	BlockLabel second_choice;
3163	compiler->macro_assembler()->BindBlock(label: &second_choice);
3164
3165	preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3166
3167	EmitChoices(compiler, alt_gens: &alt_gens, first_choice: `0`, trace, preloads: &preload);
3168	}
3169
3170	// At this point we need to generate slow checks for the alternatives where
3171	// the quick check was inlined. We can recognize these because the associated
3172	// label was bound.
3173	intptr_t new_flush_budget = trace->flush_budget() / choice_count;
3174	for (intptr_t i = `0`; i < choice_count; i++) {
3175	AlternativeGeneration* alt_gen = alt_gens.at(i);
3176	Trace new_trace(*trace);
3177	// If there are actions to be flushed we have to limit how many times
3178	// they are flushed. Take the budget of the parent trace and distribute
3179	// it fairly amongst the children.
3180	if (new_trace.actions() != nullptr) {
3181	new_trace.set_flush_budget(new_flush_budget);
3182	}
3183	bool next_expects_preload =
3184	i == choice_count - `1` ? false : alt_gens.at(i: i + `1`)->expects_preload;
3185	EmitOutOfLineContinuation(compiler, trace: &new_trace, alternative: alternatives_->At(index: i),
3186	alt_gen, preload_characters: preload.preload_characters_,
3187	next_expects_preload);
3188	}
3189	}
3190
3191	Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
3192	Trace* trace,
3193	AlternativeGenerationList* alt_gens,
3194	PreloadState* preload,
3195	GreedyLoopState* greedy_loop_state,
3196	intptr_t text_length) {
3197	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3198	// Here we have special handling for greedy loops containing only text nodes
3199	// and other simple nodes. These are handled by pushing the current
3200	// position on the stack and then incrementing the current position each
3201	// time around the switch. On backtrack we decrement the current position
3202	// and check it against the pushed value. This avoids pushing backtrack
3203	// information for each iteration of the loop, which could take up a lot of
3204	// space.
3205	ASSERT(trace->stop_node() == nullptr);
3206	macro_assembler->PushCurrentPosition();
3207	BlockLabel greedy_match_failed;
3208	Trace greedy_match_trace;
3209	if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3210	greedy_match_trace.set_backtrack(&greedy_match_failed);
3211	BlockLabel loop_label;
3212	macro_assembler->BindBlock(label: &loop_label);
3213	macro_assembler->CheckPreemption(/is_backtrack=/false);
3214	greedy_match_trace.set_stop_node(this);
3215	greedy_match_trace.set_loop_label(&loop_label);
3216	(*alternatives_)[`0`].node()->Emit(compiler, trace: &greedy_match_trace);
3217	macro_assembler->BindBlock(label: &greedy_match_failed);
3218
3219	BlockLabel second_choice; // For use in greedy matches.
3220	macro_assembler->BindBlock(label: &second_choice);
3221
3222	Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3223
3224	EmitChoices(compiler, alt_gens, first_choice: `1`, trace: new_trace, preloads: preload);
3225
3226	macro_assembler->BindBlock(label: greedy_loop_state->label());
3227	// If we have unwound to the bottom then backtrack.
3228	macro_assembler->CheckGreedyLoop(on_tos_equals_current_position: trace->backtrack());
3229	// Otherwise try the second priority at an earlier position.
3230	macro_assembler->AdvanceCurrentPosition(by: -text_length);
3231	macro_assembler->GoTo(to: &second_choice);
3232	return new_trace;
3233	}
3234
3235	intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3236	Trace* trace) {
3237	intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3238	if (alternatives_->length() != `2`) return eats_at_least;
3239
3240	GuardedAlternative alt1 = alternatives_->At(index: `1`);
3241	if (alt1.guards() != nullptr && alt1.guards()->length() != `0`) {
3242	return eats_at_least;
3243	}
3244	RegExpNode* eats_anything_node = alt1.node();
3245	if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3246	return eats_at_least;
3247	}
3248
3249	// Really we should be creating a new trace when we execute this function,
3250	// but there is no need, because the code it generates cannot backtrack, and
3251	// we always arrive here with a trivial trace (since it's the entry to a
3252	// loop. That also implies that there are no preloaded characters, which is
3253	// good, because it means we won't be violating any assumptions by
3254	// overwriting those characters with new load instructions.
3255	ASSERT(trace->is_trivial());
3256
3257	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3258	// At this point we know that we are at a non-greedy loop that will eat
3259	// any character one at a time. Any non-anchored regexp has such a
3260	// loop prepended to it in order to find where it starts. We look for
3261	// a pattern of the form ...abc... where we can look 6 characters ahead
3262	// and step forwards 3 if the character is not one of abc. Abc need
3263	// not be atoms, they can be any reasonably limited character class or
3264	// small alternation.
3265	BoyerMooreLookahead* bm = bm_info(not_at_start: false);
3266	if (bm == nullptr) {
3267	eats_at_least = Utils::Minimum(
3268	x: kMaxLookaheadForBoyerMoore,
3269	y: EatsAtLeast(still_to_find: kMaxLookaheadForBoyerMoore, budget: kRecursionBudget, not_at_start: false));
3270	if (eats_at_least >= `1`) {
3271	bm = new (Z) BoyerMooreLookahead (eats_at_least, compiler, Z);
3272	GuardedAlternative alt0 = alternatives_->At(index: `0`);
3273	alt0.node()->FillInBMInfo(offset: `0`, budget: kRecursionBudget, bm, not_at_start: false);
3274	}
3275	}
3276	if (bm != nullptr) {
3277	bm->EmitSkipInstructions(masm: macro_assembler);
3278	}
3279	return eats_at_least;
3280	}
3281
3282	void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3283	AlternativeGenerationList* alt_gens,
3284	intptr_t first_choice,
3285	Trace* trace,
3286	PreloadState* preload) {
3287	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3288	SetUpPreLoad(compiler, current_trace: trace, state: preload);
3289
3290	// For now we just call all choices one after the other. The idea ultimately
3291	// is to use the Dispatch table to try only the relevant ones.
3292	intptr_t choice_count = alternatives_->length();
3293
3294	intptr_t new_flush_budget = trace->flush_budget() / choice_count;
3295
3296	for (intptr_t i = first_choice; i < choice_count; i++) {
3297	bool is_last = i == choice_count - `1`;
3298	bool fall_through_on_failure = !is_last;
3299	GuardedAlternative alternative = alternatives_->At(index: i);
3300	AlternativeGeneration* alt_gen = alt_gens->at(i);
3301	alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3302	ZoneGrowableArray<Guard> guards = alternative.guards();
3303	intptr_t guard_count = (guards == nullptr) ? `0` : guards->length();
3304	Trace new_trace(*trace);
3305	new_trace.set_characters_preloaded(
3306	preload->preload_is_current_ ? preload->preload_characters_ : `0`);
3307	if (preload->preload_has_checked_bounds_) {
3308	new_trace.set_bound_checked_up_to(preload->preload_characters_);
3309	}
3310	new_trace.quick_check_performed()->Clear();
3311	if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
3312	if (!is_last) {
3313	new_trace.set_backtrack(&alt_gen->after);
3314	}
3315	alt_gen->expects_preload = preload->preload_is_current_;
3316	bool generate_full_check_inline = false;
3317	if (kRegexpOptimization &&
3318	try_to_emit_quick_check_for_alternative(is_first: i == `0`) &&
3319	alternative.node()->EmitQuickCheck(
3320	compiler, bounds_check_trace: trace, trace: &new_trace, preload_has_checked_bounds: preload->preload_has_checked_bounds_,
3321	on_possible_success: &alt_gen->possible_success, details: &alt_gen->quick_check_details,
3322	fall_through_on_failure)) {
3323	// Quick check was generated for this choice.
3324	preload->preload_is_current_ = true;
3325	preload->preload_has_checked_bounds_ = true;
3326	// If we generated the quick check to fall through on possible success,
3327	// we now need to generate the full check inline.
3328	if (!fall_through_on_failure) {
3329	macro_assembler->BindBlock(label: &alt_gen->possible_success);
3330	new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3331	new_trace.set_characters_preloaded(preload->preload_characters_);
3332	new_trace.set_bound_checked_up_to(preload->preload_characters_);
3333	generate_full_check_inline = true;
3334	}
3335	} else if (alt_gen->quick_check_details.cannot_match()) {
3336	if (!fall_through_on_failure) {
3337	macro_assembler->GoTo(to: trace->backtrack());
3338	}
3339	continue;
3340	} else {
3341	// No quick check was generated. Put the full code here.
3342	// If this is not the first choice then there could be slow checks from
3343	// previous cases that go here when they fail. There's no reason to
3344	// insist that they preload characters since the slow check we are about
3345	// to generate probably can't use it.
3346	if (i != first_choice) {
3347	alt_gen->expects_preload = false;
3348	new_trace.InvalidateCurrentCharacter();
3349	}
3350	generate_full_check_inline = true;
3351	}
3352	if (generate_full_check_inline) {
3353	if (new_trace.actions() != nullptr) {
3354	new_trace.set_flush_budget(new_flush_budget);
3355	}
3356	for (intptr_t j = `0`; j < guard_count; j++) {
3357	GenerateGuard(macro_assembler, guard: guards->At(index: j), trace: &new_trace);
3358	}
3359	alternative.node()->Emit(compiler, trace: &new_trace);
3360	preload->preload_is_current_ = false;
3361	}
3362	macro_assembler->BindBlock(label: &alt_gen->after);
3363	}
3364	}
3365
3366	void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3367	Trace* trace,
3368	GuardedAlternative alternative,
3369	AlternativeGeneration* alt_gen,
3370	intptr_t preload_characters,
3371	bool next_expects_preload) {
3372	if (!alt_gen->possible_success.is_linked()) return;
3373
3374	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3375	macro_assembler->BindBlock(label: &alt_gen->possible_success);
3376	Trace out_of_line_trace(*trace);
3377	out_of_line_trace.set_characters_preloaded(preload_characters);
3378	out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3379	if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3380	ZoneGrowableArray<Guard> guards = alternative.guards();
3381	intptr_t guard_count = (guards == nullptr) ? `0` : guards->length();
3382	if (next_expects_preload) {
3383	BlockLabel reload_current_char;
3384	out_of_line_trace.set_backtrack(&reload_current_char);
3385	for (intptr_t j = `0`; j < guard_count; j++) {
3386	GenerateGuard(macro_assembler, guard: guards->At(index: j), trace: &out_of_line_trace);
3387	}
3388	alternative.node()->Emit(compiler, trace: &out_of_line_trace);
3389	macro_assembler->BindBlock(label: &reload_current_char);
3390	// Reload the current character, since the next quick check expects that.
3391	// We don't need to check bounds here because we only get into this
3392	// code through a quick check which already did the checked load.
3393	macro_assembler->LoadCurrentCharacter(cp_offset: trace->cp_offset(), on_end_of_input: nullptr, check_bounds: false,
3394	characters: preload_characters);
3395	macro_assembler->GoTo(to: &(alt_gen->after));
3396	} else {
3397	out_of_line_trace.set_backtrack(&(alt_gen->after));
3398	for (intptr_t j = `0`; j < guard_count; j++) {
3399	GenerateGuard(macro_assembler, guard: guards->At(index: j), trace: &out_of_line_trace);
3400	}
3401	alternative.node()->Emit(compiler, trace: &out_of_line_trace);
3402	}
3403	}
3404
3405	void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3406	RegExpMacroAssembler* assembler = compiler->macro_assembler();
3407	LimitResult limit_result = LimitVersions(compiler, trace);
3408	if (limit_result == DONE) return;
3409	ASSERT(limit_result == CONTINUE);
3410
3411	RecursionCheck rc(compiler);
3412
3413	switch (action_type_) {
3414	case STORE_POSITION: {
3415	Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3416	data_.u_position_register.is_capture,
3417	trace);
3418	Trace new_trace = *trace;
3419	new_trace.add_action(new_action: &new_capture);
3420	on_success()->Emit(compiler, trace: &new_trace);
3421	break;
3422	}
3423	case INCREMENT_REGISTER: {
3424	Trace::DeferredIncrementRegister new_increment(
3425	data_.u_increment_register.reg);
3426	Trace new_trace = *trace;
3427	new_trace.add_action(new_action: &new_increment);
3428	on_success()->Emit(compiler, trace: &new_trace);
3429	break;
3430	}
3431	case SET_REGISTER: {
3432	Trace::DeferredSetRegister new_set(data_.u_store_register.reg,
3433	data_.u_store_register.value);
3434	Trace new_trace = *trace;
3435	new_trace.add_action(new_action: &new_set);
3436	on_success()->Emit(compiler, trace: &new_trace);
3437	break;
3438	}
3439	case CLEAR_CAPTURES: {
3440	Trace::DeferredClearCaptures new_capture(Interval (
3441	data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3442	Trace new_trace = *trace;
3443	new_trace.add_action(new_action: &new_capture);
3444	on_success()->Emit(compiler, trace: &new_trace);
3445	break;
3446	}
3447	case BEGIN_SUBMATCH:
3448	if (!trace->is_trivial()) {
3449	trace->Flush(compiler, successor: this);
3450	} else {
3451	assembler->WriteCurrentPositionToRegister(
3452	reg: data_.u_submatch.current_position_register, cp_offset: `0`);
3453	assembler->WriteStackPointerToRegister(
3454	reg: data_.u_submatch.stack_pointer_register);
3455	on_success()->Emit(compiler, trace);
3456	}
3457	break;
3458	case EMPTY_MATCH_CHECK: {
3459	intptr_t start_pos_reg = data_.u_empty_match_check.start_register;
3460	intptr_t stored_pos = `0`;
3461	intptr_t rep_reg = data_.u_empty_match_check.repetition_register;
3462	bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3463	bool know_dist = trace->GetStoredPosition(reg: start_pos_reg, cp_offset: &stored_pos);
3464	if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3465	// If we know we haven't advanced and there is no minimum we
3466	// can just backtrack immediately.
3467	assembler->GoTo(to: trace->backtrack());
3468	} else if (know_dist && stored_pos < trace->cp_offset()) {
3469	// If we know we've advanced we can generate the continuation
3470	// immediately.
3471	on_success()->Emit(compiler, trace);
3472	} else if (!trace->is_trivial()) {
3473	trace->Flush(compiler, successor: this);
3474	} else {
3475	BlockLabel skip_empty_check;
3476	// If we have a minimum number of repetitions we check the current
3477	// number first and skip the empty check if it's not enough.
3478	if (has_minimum) {
3479	intptr_t limit = data_.u_empty_match_check.repetition_limit;
3480	assembler->IfRegisterLT(reg: rep_reg, comparand: limit, if_lt: &skip_empty_check);
3481	}
3482	// If the match is empty we bail out, otherwise we fall through
3483	// to the on-success continuation.
3484	assembler->IfRegisterEqPos(reg: data_.u_empty_match_check.start_register,
3485	if_eq: trace->backtrack());
3486	assembler->BindBlock(label: &skip_empty_check);
3487	on_success()->Emit(compiler, trace);
3488	}
3489	break;
3490	}
3491	case POSITIVE_SUBMATCH_SUCCESS: {
3492	if (!trace->is_trivial()) {
3493	trace->Flush(compiler, successor: this);
3494	return;
3495	}
3496	assembler->ReadCurrentPositionFromRegister(
3497	reg: data_.u_submatch.current_position_register);
3498	assembler->ReadStackPointerFromRegister(
3499	reg: data_.u_submatch.stack_pointer_register);
3500	intptr_t clear_register_count = data_.u_submatch.clear_register_count;
3501	if (clear_register_count == `0`) {
3502	on_success()->Emit(compiler, trace);
3503	return;
3504	}
3505	intptr_t clear_registers_from = data_.u_submatch.clear_register_from;
3506	BlockLabel clear_registers_backtrack;
3507	Trace new_trace = *trace;
3508	new_trace.set_backtrack(&clear_registers_backtrack);
3509	on_success()->Emit(compiler, trace: &new_trace);
3510
3511	assembler->BindBlock(label: &clear_registers_backtrack);
3512	intptr_t clear_registers_to =
3513	clear_registers_from + clear_register_count - `1`;
3514	assembler->ClearRegisters(reg_from: clear_registers_from, reg_to: clear_registers_to);
3515
3516	ASSERT(trace->backtrack() == nullptr);
3517	assembler->Backtrack();
3518	return;
3519	}
3520	default:
3521	UNREACHABLE();
3522	}
3523	}
3524
3525	void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3526	RegExpMacroAssembler* assembler = compiler->macro_assembler();
3527	if (!trace->is_trivial()) {
3528	trace->Flush(compiler, successor: this);
3529	return;
3530	}
3531
3532	LimitResult limit_result = LimitVersions(compiler, trace);
3533	if (limit_result == DONE) return;
3534	ASSERT(limit_result == CONTINUE);
3535
3536	RecursionCheck rc(compiler);
3537
3538	ASSERT(start_reg_ + `1` == end_reg_);
3539	if (flags_.IgnoreCase()) {
3540	assembler->CheckNotBackReferenceIgnoreCase(
3541	start_reg: start_reg_, read_backward: read_backward(), unicode: flags_.IsUnicode(), on_no_match: trace->backtrack());
3542	} else {
3543	assembler->CheckNotBackReference(start_reg: start_reg_, read_backward: read_backward(),
3544	on_no_match: trace->backtrack());
3545	}
3546	// We are going to advance backward, so we may end up at the start.
3547	if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
3548
3549	// Check that the back reference does not end inside a surrogate pair.
3550	if (flags_.IsUnicode() && !compiler->one_byte()) {
3551	assembler->CheckNotInSurrogatePair(cp_offset: trace->cp_offset(), on_failure: trace->backtrack());
3552	}
3553
3554	on_success()->Emit(compiler, trace);
3555	}
3556
3557	// -------------------------------------------------------------------
3558	// Dot/dotty output
3559
3560	#ifdef DEBUG
3561
3562	class DotPrinter : public NodeVisitor {
3563	public:
3564	explicit DotPrinter(bool ignore_case) {}
3565	void PrintNode(const char* label, RegExpNode* node);
3566	void Visit(RegExpNode* node);
3567	void PrintAttributes(RegExpNode* from);
3568	void PrintOnFailure(RegExpNode* from, RegExpNode* to);
3569	#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
3570	FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3571	#undef DECLARE_VISIT
3572	};
3573
3574	void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
3575	OS::PrintErr("digraph G {\n graph [label=\"");
3576	for (intptr_t i = `0`; label[i] != `'\0'`; i++) {
3577	switch (label[i]) {
3578	case `'\\'`:
3579	OS::PrintErr("\\\\");
3580	break;
3581	case `'"'`:
3582	OS::PrintErr("\"");
3583	break;
3584	default:
3585	OS::PrintErr("%c", label[i]);
3586	break;
3587	}
3588	}
3589	OS::PrintErr("\"];\n");
3590	Visit(node);
3591	OS::PrintErr("}\n");
3592	}
3593
3594	void DotPrinter::Visit(RegExpNode* node) {
3595	if (node->info()->visited) return;
3596	node->info()->visited = true;
3597	node->Accept(this);
3598	}
3599
3600	void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
3601	OS::PrintErr(" n%p -> n%p [style=dotted];\n", from, on_failure);
3602	Visit(on_failure);
3603	}
3604
3605	class AttributePrinter : public ValueObject {
3606	public:
3607	AttributePrinter() : first_(true) {}
3608	void PrintSeparator() {
3609	if (first_) {
3610	first_ = false;
3611	} else {
3612	OS::PrintErr("\|");
3613	}
3614	}
3615	void PrintBit(const char* name, bool value) {
3616	if (!value) return;
3617	PrintSeparator();
3618	OS::PrintErr("{%s}", name);
3619	}
3620	void PrintPositive(const char* name, intptr_t value) {
3621	if (value < `0`) return;
3622	PrintSeparator();
3623	OS::PrintErr("{%s\|%" Pd "}", name, value);
3624	}
3625
3626	private:
3627	bool first_;
3628	};
3629
3630	void DotPrinter::PrintAttributes(RegExpNode* that) {
3631	OS::PrintErr(
3632	" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
3633	"margin=0.1, fontsize=10, label=\"{",
3634	that);
3635	AttributePrinter printer;
3636	NodeInfo* info = that->info();
3637	printer.PrintBit("NI", info->follows_newline_interest);
3638	printer.PrintBit("WI", info->follows_word_interest);
3639	printer.PrintBit("SI", info->follows_start_interest);
3640	BlockLabel* label = that->label();
3641	if (label->is_bound()) printer.PrintPositive("@", label->pos());
3642	OS::PrintErr(
3643	"}\"];\n"
3644	" a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",
3645	that, that);
3646	}
3647
3648	void DotPrinter::VisitChoice(ChoiceNode* that) {
3649	OS::PrintErr(" n%p [shape=Mrecord, label=\"?\"];\n", that);
3650	for (intptr_t i = `0`; i < that->alternatives()->length(); i++) {
3651	GuardedAlternative alt = that->alternatives()->At(i);
3652	OS::PrintErr(" n%p -> n%p", that, alt.node());
3653	}
3654	for (intptr_t i = `0`; i < that->alternatives()->length(); i++) {
3655	GuardedAlternative alt = that->alternatives()->At(i);
3656	alt.node()->Accept(this);
3657	}
3658	}
3659
3660	void DotPrinter::VisitText(TextNode* that) {
3661	OS::PrintErr(" n%p [label=\"", that);
3662	for (intptr_t i = `0`; i < that->elements()->length(); i++) {
3663	if (i > `0`) OS::PrintErr(" ");
3664	TextElement elm = that->elements()->At(i);
3665	switch (elm.text_type()) {
3666	case TextElement::ATOM: {
3667	ZoneGrowableArray<uint16_t>* data = elm.atom()->data();
3668	for (intptr_t i = `0`; i < data->length(); i++) {
3669	OS::PrintErr("%c", static_cast<char>(data->At(i)));
3670	}
3671	break;
3672	}
3673	case TextElement::CHAR_CLASS: {
3674	RegExpCharacterClass* node = elm.char_class();
3675	OS::PrintErr("[");
3676	if (node->is_negated()) OS::PrintErr("^");
3677	for (intptr_t j = `0`; j < node->ranges()->length(); j++) {
3678	CharacterRange range = node->ranges()->At(j);
3679	PrintUtf16(range.from());
3680	OS::PrintErr("-");
3681	PrintUtf16(range.to());
3682	}
3683	OS::PrintErr("]");
3684	break;
3685	}
3686	default:
3687	UNREACHABLE();
3688	}
3689	}
3690	OS::PrintErr("\", shape=box, peripheries=2];\n");
3691	PrintAttributes(that);
3692	OS::PrintErr(" n%p -> n%p;\n", that, that->on_success());
3693	Visit(that->on_success());
3694	}
3695
3696	void DotPrinter::VisitBackReference(BackReferenceNode* that) {
3697	OS::PrintErr(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n",
3698	that, that->start_register(), that->end_register());
3699	PrintAttributes(that);
3700	OS::PrintErr(" n%p -> n%p;\n", that, that->on_success());
3701	Visit(that->on_success());
3702	}
3703
3704	void DotPrinter::VisitEnd(EndNode* that) {
3705	OS::PrintErr(" n%p [style=bold, shape=point];\n", that);
3706	PrintAttributes(that);
3707	}
3708
3709	void DotPrinter::VisitAssertion(AssertionNode* that) {
3710	OS::PrintErr(" n%p [", that);
3711	switch (that->assertion_type()) {
3712	case AssertionNode::AT_END:
3713	OS::PrintErr("label=\"$\", shape=septagon");
3714	break;
3715	case AssertionNode::AT_START:
3716	OS::PrintErr("label=\"^\", shape=septagon");
3717	break;
3718	case AssertionNode::AT_BOUNDARY:
3719	OS::PrintErr("label=\"\\b\", shape=septagon");
3720	break;
3721	case AssertionNode::AT_NON_BOUNDARY:
3722	OS::PrintErr("label=\"\\B\", shape=septagon");
3723	break;
3724	case AssertionNode::AFTER_NEWLINE:
3725	OS::PrintErr("label=\"(?<=\\n)\", shape=septagon");
3726	break;
3727	}
3728	OS::PrintErr("];\n");
3729	PrintAttributes(that);
3730	RegExpNode* successor = that->on_success();
3731	OS::PrintErr(" n%p -> n%p;\n", that, successor);
3732	Visit(successor);
3733	}
3734
3735	void DotPrinter::VisitAction(ActionNode* that) {
3736	OS::PrintErr(" n%p [", that);
3737	switch (that->action_type_) {
3738	case ActionNode::SET_REGISTER:
3739	OS::PrintErr("label=\"$%" Pd ":=%" Pd "\", shape=octagon",
3740	that->data_.u_store_register.reg,
3741	that->data_.u_store_register.value);
3742	break;
3743	case ActionNode::INCREMENT_REGISTER:
3744	OS::PrintErr("label=\"$%" Pd "++\", shape=octagon",
3745	that->data_.u_increment_register.reg);
3746	break;
3747	case ActionNode::STORE_POSITION:
3748	OS::PrintErr("label=\"$%" Pd ":=$pos\", shape=octagon",
3749	that->data_.u_position_register.reg);
3750	break;
3751	case ActionNode::BEGIN_SUBMATCH:
3752	OS::PrintErr("label=\"$%" Pd ":=$pos,begin\", shape=septagon",
3753	that->data_.u_submatch.current_position_register);
3754	break;
3755	case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3756	OS::PrintErr("label=\"escape\", shape=septagon");
3757	break;
3758	case ActionNode::EMPTY_MATCH_CHECK:
3759	OS::PrintErr("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon",
3760	that->data_.u_empty_match_check.start_register,
3761	that->data_.u_empty_match_check.repetition_register,
3762	that->data_.u_empty_match_check.repetition_limit);
3763	break;
3764	case ActionNode::CLEAR_CAPTURES: {
3765	OS::PrintErr("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon",
3766	that->data_.u_clear_captures.range_from,
3767	that->data_.u_clear_captures.range_to);
3768	break;
3769	}
3770	}
3771	OS::PrintErr("];\n");
3772	PrintAttributes(that);
3773	RegExpNode* successor = that->on_success();
3774	OS::PrintErr(" n%p -> n%p;\n", that, successor);
3775	Visit(successor);
3776	}
3777
3778	void RegExpEngine::DotPrint(const char* label,
3779	RegExpNode* node,
3780	bool ignore_case) {
3781	DotPrinter printer(ignore_case);
3782	printer.PrintNode(label, node);
3783	}
3784
3785	#endif // DEBUG
3786
3787	// -------------------------------------------------------------------
3788	// Tree to graph conversion
3789
3790	// The zone in which we allocate graph nodes.
3791	#define OZ (on_success->zone())
3792
3793	RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
3794	RegExpNode* on_success) {
3795	ZoneGrowableArray<TextElement>* elms =
3796	new (OZ) ZoneGrowableArray<TextElement>(`1`);
3797	elms->Add(value: TextElement::Atom(atom: this));
3798	return new (OZ) TextNode (elms, compiler->read_backward(), on_success);
3799	}
3800
3801	RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
3802	RegExpNode* on_success) {
3803	ZoneGrowableArray<TextElement>* elms =
3804	new (OZ) ZoneGrowableArray<TextElement>(`1`);
3805	for (intptr_t i = `0`; i < elements()->length(); i++) {
3806	elms->Add(value: elements()->At(index: i));
3807	}
3808	return new (OZ) TextNode (elms, compiler->read_backward(), on_success);
3809	}
3810
3811	static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,
3812	const int32_t* special_class,
3813	intptr_t length) {
3814	length--; // Remove final kRangeEndMarker.
3815	ASSERT(special_class[length] == kRangeEndMarker);
3816	ASSERT(ranges->length() != `0`);
3817	ASSERT(length != `0`);
3818	ASSERT(special_class[`0`] != `0`);
3819	if (ranges->length() != (length >> `1`) + `1`) {
3820	return false;
3821	}
3822	CharacterRange range = ranges->At(index: `0`);
3823	if (range.from() != `0`) {
3824	return false;
3825	}
3826	for (intptr_t i = `0`; i < length; i += `2`) {
3827	if (special_class[i] != (range.to() + `1`)) {
3828	return false;
3829	}
3830	range = ranges->At(index: (i >> `1`) + `1`);
3831	if (special_class[i + `1`] != range.from()) {
3832	return false;
3833	}
3834	}
3835	if (range.to() != Utf::kMaxCodePoint) {
3836	return false;
3837	}
3838	return true;
3839	}
3840
3841	static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges,
3842	const int32_t* special_class,
3843	intptr_t length) {
3844	length--; // Remove final kRangeEndMarker.
3845	ASSERT(special_class[length] == kRangeEndMarker);
3846	if (ranges->length() * `2` != length) {
3847	return false;
3848	}
3849	for (intptr_t i = `0`; i < length; i += `2`) {
3850	CharacterRange range = ranges->At(index: i >> `1`);
3851	if (range.from() != special_class[i] \|\|
3852	range.to() != special_class[i + `1`] - `1`) {
3853	return false;
3854	}
3855	}
3856	return true;
3857	}
3858
3859	bool RegExpCharacterClass::is_standard() {
3860	// TODO(lrn): Remove need for this function, by not throwing away information
3861	// along the way.
3862	if (is_negated()) {
3863	return false;
3864	}
3865	if (set_.is_standard()) {
3866	return true;
3867	}
3868	if (CompareRanges(ranges: set_.ranges(), special_class: kSpaceRanges, length: kSpaceRangeCount)) {
3869	set_.set_standard_set_type(`'s'`);
3870	return true;
3871	}
3872	if (CompareInverseRanges(ranges: set_.ranges(), special_class: kSpaceRanges, length: kSpaceRangeCount)) {
3873	set_.set_standard_set_type(`'S'`);
3874	return true;
3875	}
3876	if (CompareInverseRanges(ranges: set_.ranges(), special_class: kLineTerminatorRanges,
3877	length: kLineTerminatorRangeCount)) {
3878	set_.set_standard_set_type(`'.'`);
3879	return true;
3880	}
3881	if (CompareRanges(ranges: set_.ranges(), special_class: kLineTerminatorRanges,
3882	length: kLineTerminatorRangeCount)) {
3883	set_.set_standard_set_type(`'n'`);
3884	return true;
3885	}
3886	if (CompareRanges(ranges: set_.ranges(), special_class: kWordRanges, length: kWordRangeCount)) {
3887	set_.set_standard_set_type(`'w'`);
3888	return true;
3889	}
3890	if (CompareInverseRanges(ranges: set_.ranges(), special_class: kWordRanges, length: kWordRangeCount)) {
3891	set_.set_standard_set_type(`'W'`);
3892	return true;
3893	}
3894	return false;
3895	}
3896
3897	UnicodeRangeSplitter::UnicodeRangeSplitter(
3898	Zone* zone,
3899	ZoneGrowableArray<CharacterRange>* base)
3900	: zone_(zone),
3901	table_(zone),
3902	bmp_(nullptr),
3903	lead_surrogates_(nullptr),
3904	trail_surrogates_(nullptr),
3905	non_bmp_(nullptr) {
3906	// The unicode range splitter categorizes given character ranges into:
3907	// - Code points from the BMP representable by one code unit.
3908	// - Code points outside the BMP that need to be split into surrogate pairs.
3909	// - Lone lead surrogates.
3910	// - Lone trail surrogates.
3911	// Lone surrogates are valid code points, even though no actual characters.
3912	// They require special matching to make sure we do not split surrogate pairs.
3913	// We use the dispatch table to accomplish this. The base range is split up
3914	// by the table by the overlay ranges, and the Call callback is used to
3915	// filter and collect ranges for each category.
3916	for (intptr_t i = `0`; i < base->length(); i++) {
3917	table_.AddRange(range: base->At(index: i), value: kBase, zone: zone_);
3918	}
3919	// Add overlay ranges.
3920	table_.AddRange(range: CharacterRange::Range(from: `0`, to: Utf16::kLeadSurrogateStart - `1`),
3921	value: kBmpCodePoints, zone: zone_);
3922	table_.AddRange(range: CharacterRange::Range(from: Utf16::kLeadSurrogateStart,
3923	to: Utf16::kLeadSurrogateEnd),
3924	value: kLeadSurrogates, zone: zone_);
3925	table_.AddRange(range: CharacterRange::Range(from: Utf16::kTrailSurrogateStart,
3926	to: Utf16::kTrailSurrogateEnd),
3927	value: kTrailSurrogates, zone: zone_);
3928	table_.AddRange(
3929	range: CharacterRange::Range(from: Utf16::kTrailSurrogateEnd + `1`, to: Utf16::kMaxCodeUnit),
3930	value: kBmpCodePoints, zone: zone_);
3931	table_.AddRange(
3932	range: CharacterRange::Range(from: Utf16::kMaxCodeUnit + `1`, to: Utf::kMaxCodePoint),
3933	value: kNonBmpCodePoints, zone: zone_);
3934	table_.ForEach(callback: this);
3935	}
3936
3937	void UnicodeRangeSplitter::Call(uint32_t from, ChoiceTable::Entry entry) {
3938	OutSet* outset = entry.out_set();
3939	if (!outset->Get(value: kBase)) return;
3940	ZoneGrowableArray<CharacterRange> target = nullptr**;
3941	if (outset->Get(value: kBmpCodePoints)) {
3942	target = &bmp_;
3943	} else if (outset->Get(value: kLeadSurrogates)) {
3944	target = &lead_surrogates_;
3945	} else if (outset->Get(value: kTrailSurrogates)) {
3946	target = &trail_surrogates_;
3947	} else {
3948	ASSERT(outset->Get(kNonBmpCodePoints));
3949	target = &non_bmp_;
3950	}
3951	if (target == nullptr*) {
3952	target = new* (zone_) ZoneGrowableArray<CharacterRange>(`2`);
3953	}
3954	(*target)->Add(value: CharacterRange::Range(from: entry.from(), to: entry.to()));
3955	}
3956
3957	void AddBmpCharacters(RegExpCompiler* compiler,
3958	ChoiceNode* result,
3959	RegExpNode* on_success,
3960	UnicodeRangeSplitter* splitter) {
3961	ZoneGrowableArray<CharacterRange>* bmp = splitter->bmp();
3962	if (bmp == nullptr) return;
3963	result->AddAlternative(node: GuardedAlternative (TextNode::CreateForCharacterRanges(
3964	ranges: bmp, read_backward: compiler->read_backward(), on_success, flags: RegExpFlags ())));
3965	}
3966
3967	void AddNonBmpSurrogatePairs(RegExpCompiler* compiler,
3968	ChoiceNode* result,
3969	RegExpNode* on_success,
3970	UnicodeRangeSplitter* splitter) {
3971	ZoneGrowableArray<CharacterRange>* non_bmp = splitter->non_bmp();
3972	if (non_bmp == nullptr) return;
3973	ASSERT(!compiler->one_byte());
3974	CharacterRange::Canonicalize(ranges: non_bmp);
3975	for (int i = `0`; i < non_bmp->length(); i++) {
3976	// Match surrogate pair.
3977	// E.g. [\u10005-\u11005] becomes
3978	// \ud800[\udc05-\udfff]\|
3979	// [\ud801-\ud803][\udc00-\udfff]\|
3980	// \ud804[\udc00-\udc05]
3981	uint32_t from = non_bmp->At(index: i).from();
3982	uint32_t to = non_bmp->At(index: i).to();
3983	uint16_t from_points[`2`];
3984	Utf16::Encode(codepoint: from, dst: from_points);
3985	uint16_t to_points[`2`];
3986	Utf16::Encode(codepoint: to, dst: to_points);
3987	if (from_points[`0`] == to_points[`0`]) {
3988	// The lead surrogate is the same.
3989	result->AddAlternative(
3990	node: GuardedAlternative (TextNode::CreateForSurrogatePair(
3991	lead: CharacterRange::Singleton(value: from_points[`0`]),
3992	trail: CharacterRange::Range(from: from_points[`1`], to: to_points[`1`]),
3993	read_backward: compiler->read_backward(), on_success, flags: RegExpFlags ())));
3994	} else {
3995	if (from_points[`1`] != Utf16::kTrailSurrogateStart) {
3996	// Add [from_l][from_t-\udfff]
3997	result->AddAlternative(
3998	node: GuardedAlternative (TextNode::CreateForSurrogatePair(
3999	lead: CharacterRange::Singleton(value: from_points[`0`]),
4000	trail: CharacterRange::Range(from: from_points[`1`],
4001	to: Utf16::kTrailSurrogateEnd),
4002	read_backward: compiler->read_backward(), on_success, flags: RegExpFlags ())));
4003	from_points[`0`]++;
4004	}
4005	if (to_points[`1`] != Utf16::kTrailSurrogateEnd) {
4006	// Add [to_l][\udc00-to_t]
4007	result->AddAlternative(
4008	node: GuardedAlternative (TextNode::CreateForSurrogatePair(
4009	lead: CharacterRange::Singleton(value: to_points[`0`]),
4010	trail: CharacterRange::Range(from: Utf16::kTrailSurrogateStart,
4011	to: to_points[`1`]),
4012	read_backward: compiler->read_backward(), on_success, flags: RegExpFlags ())));
4013	to_points[`0`]--;
4014	}
4015	if (from_points[`0`] <= to_points[`0`]) {
4016	// Add [from_l-to_l][\udc00-\udfff]
4017	result->AddAlternative(
4018	node: GuardedAlternative (TextNode::CreateForSurrogatePair(
4019	lead: CharacterRange::Range(from: from_points[`0`], to: to_points[`0`]),
4020	trail: CharacterRange::Range(from: Utf16::kTrailSurrogateStart,
4021	to: Utf16::kTrailSurrogateEnd),
4022	read_backward: compiler->read_backward(), on_success, flags: RegExpFlags ())));
4023	}
4024	}
4025	}
4026	}
4027
4028	RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
4029	RegExpCompiler* compiler,
4030	ZoneGrowableArray<CharacterRange>* lookbehind,
4031	ZoneGrowableArray<CharacterRange>* match,
4032	RegExpNode* on_success,
4033	bool read_backward,
4034	RegExpFlags flags) {
4035	RegExpNode* match_node = TextNode::CreateForCharacterRanges(
4036	ranges: match, read_backward, on_success, flags);
4037	int stack_register = compiler->UnicodeLookaroundStackRegister();
4038	int position_register = compiler->UnicodeLookaroundPositionRegister();
4039	RegExpLookaround::Builder lookaround(false, match_node, stack_register,
4040	position_register);
4041	RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
4042	ranges: lookbehind, read_backward: !read_backward, on_success: lookaround.on_match_success(), flags);
4043	return lookaround.ForMatch(match: negative_match);
4044	}
4045
4046	RegExpNode* MatchAndNegativeLookaroundInReadDirection(
4047	RegExpCompiler* compiler,
4048	ZoneGrowableArray<CharacterRange>* match,
4049	ZoneGrowableArray<CharacterRange>* lookahead,
4050	RegExpNode* on_success,
4051	bool read_backward,
4052	RegExpFlags flags) {
4053	int stack_register = compiler->UnicodeLookaroundStackRegister();
4054	int position_register = compiler->UnicodeLookaroundPositionRegister();
4055	RegExpLookaround::Builder lookaround(false, on_success, stack_register,
4056	position_register);
4057	RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
4058	ranges: lookahead, read_backward, on_success: lookaround.on_match_success(), flags);
4059	return TextNode::CreateForCharacterRanges(
4060	ranges: match, read_backward, on_success: lookaround.ForMatch(match: negative_match), flags);
4061	}
4062
4063	void AddLoneLeadSurrogates(RegExpCompiler* compiler,
4064	ChoiceNode* result,
4065	RegExpNode* on_success,
4066	UnicodeRangeSplitter* splitter) {
4067	auto lead_surrogates = splitter->lead_surrogates();
4068	if (lead_surrogates == nullptr) return;
4069	// E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
4070	auto trail_surrogates = CharacterRange::List(
4071	zone: on_success->zone(), range: CharacterRange::Range(from: Utf16::kTrailSurrogateStart,
4072	to: Utf16::kTrailSurrogateEnd));
4073
4074	RegExpNode* match;
4075	if (compiler->read_backward()) {
4076	// Reading backward. Assert that reading forward, there is no trail
4077	// surrogate, and then backward match the lead surrogate.
4078	match = NegativeLookaroundAgainstReadDirectionAndMatch(
4079	compiler, lookbehind: trail_surrogates, match: lead_surrogates, on_success, read_backward: true,
4080	flags: RegExpFlags ());
4081	} else {
4082	// Reading forward. Forward match the lead surrogate and assert that
4083	// no trail surrogate follows.
4084	match = MatchAndNegativeLookaroundInReadDirection(
4085	compiler, match: lead_surrogates, lookahead: trail_surrogates, on_success, read_backward: false,
4086	flags: RegExpFlags ());
4087	}
4088	result->AddAlternative(node: GuardedAlternative (match));
4089	}
4090
4091	void AddLoneTrailSurrogates(RegExpCompiler* compiler,
4092	ChoiceNode* result,
4093	RegExpNode* on_success,
4094	UnicodeRangeSplitter* splitter) {
4095	auto trail_surrogates = splitter->trail_surrogates();
4096	if (trail_surrogates == nullptr) return;
4097	// E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
4098	auto lead_surrogates = CharacterRange::List(
4099	zone: on_success->zone(), range: CharacterRange::Range(from: Utf16::kLeadSurrogateStart,
4100	to: Utf16::kLeadSurrogateEnd));
4101
4102	RegExpNode* match;
4103	if (compiler->read_backward()) {
4104	// Reading backward. Backward match the trail surrogate and assert that no
4105	// lead surrogate precedes it.
4106	match = MatchAndNegativeLookaroundInReadDirection(
4107	compiler, match: trail_surrogates, lookahead: lead_surrogates, on_success, read_backward: true,
4108	flags: RegExpFlags ());
4109	} else {
4110	// Reading forward. Assert that reading backward, there is no lead
4111	// surrogate, and then forward match the trail surrogate.
4112	match = NegativeLookaroundAgainstReadDirectionAndMatch(
4113	compiler, lookbehind: lead_surrogates, match: trail_surrogates, on_success, read_backward: false,
4114	flags: RegExpFlags ());
4115	}
4116	result->AddAlternative(node: GuardedAlternative (match));
4117	}
4118
4119	RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
4120	RegExpNode* on_success) {
4121	// This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
4122	ASSERT(!compiler->read_backward());
4123	// Advance any character. If the character happens to be a lead surrogate and
4124	// we advanced into the middle of a surrogate pair, it will work out, as
4125	// nothing will match from there. We will have to advance again, consuming
4126	// the associated trail surrogate.
4127	auto range = CharacterRange::List(
4128	zone: on_success->zone(), range: CharacterRange::Range(from: `0`, to: Utf16::kMaxCodeUnit));
4129	return TextNode::CreateForCharacterRanges(ranges: range, read_backward: false, on_success,
4130	flags: RegExpFlags ());
4131	}
4132
4133	void AddUnicodeCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges) {
4134	ASSERT(CharacterRange::IsCanonical(ranges));
4135
4136	// Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
4137	// See also https://crbug.com/v8/6727.
4138	// TODO(sstrickl): This only covers the special case of the {0,0x10FFFF}
4139	// range, which we use frequently internally. But large ranges can also easily
4140	// be created by the user. We might want to have a more general caching
4141	// mechanism for such ranges.
4142	if (ranges->length() == `1` && ranges->At(index: `0`).IsEverything(max: Utf::kMaxCodePoint)) {
4143	return;
4144	}
4145
4146	icu::UnicodeSet set;
4147	for (int i = `0`; i < ranges->length(); i++) {
4148	set.add(start: ranges->At(index: i).from(), end: ranges->At(index: i).to());
4149	}
4150	ranges->Clear();
4151	set.closeOver(attribute: USET_CASE_INSENSITIVE);
4152	// Full case mapping map single characters to multiple characters.
4153	// Those are represented as strings in the set. Remove them so that
4154	// we end up with only simple and common case mappings.
4155	set.removeAllStrings();
4156	for (int i = `0`; i < set.getRangeCount(); i++) {
4157	ranges->Add(
4158	value: CharacterRange::Range(from: set.getRangeStart(index: i), to: set.getRangeEnd(index: i)));
4159	}
4160	// No errors and everything we collected have been ranges.
4161	CharacterRange::Canonicalize(ranges);
4162	}
4163
4164	RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4165	RegExpNode* on_success) {
4166	set_.Canonicalize();
4167	ZoneGrowableArray<CharacterRange>* ranges = this->ranges();
4168	if (flags_.NeedsUnicodeCaseEquivalents()) {
4169	AddUnicodeCaseEquivalents(ranges);
4170	}
4171	if (flags_.IsUnicode() && !compiler->one_byte() &&
4172	!contains_split_surrogate()) {
4173	if (is_negated()) {
4174	ZoneGrowableArray<CharacterRange>* negated =
4175	new ZoneGrowableArray<CharacterRange>(`2`);
4176	CharacterRange::Negate(src: ranges, dst: negated);
4177	ranges = negated;
4178	}
4179	if (ranges->length() == `0`) {
4180	RegExpCharacterClass* fail =
4181	new RegExpCharacterClass (ranges, RegExpFlags ());
4182	return new TextNode (fail, compiler->read_backward(), on_success);
4183	}
4184	if (standard_type() == `'*'`) {
4185	return UnanchoredAdvance(compiler, on_success);
4186	} else {
4187	ChoiceNode* result = new (OZ) ChoiceNode (`2`, OZ);
4188	UnicodeRangeSplitter splitter(OZ, ranges);
4189	AddBmpCharacters(compiler, result, on_success, splitter: &splitter);
4190	AddNonBmpSurrogatePairs(compiler, result, on_success, splitter: &splitter);
4191	AddLoneLeadSurrogates(compiler, result, on_success, splitter: &splitter);
4192	AddLoneTrailSurrogates(compiler, result, on_success, splitter: &splitter);
4193	return result;
4194	}
4195	} else {
4196	return new TextNode (this, compiler->read_backward(), on_success);
4197	}
4198	return new (OZ) TextNode (this, compiler->read_backward(), on_success);
4199	}
4200
4201	RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
4202	RegExpNode* on_success) {
4203	ZoneGrowableArray<RegExpTree> alternatives = this->alternatives();
4204	intptr_t length = alternatives->length();
4205	ChoiceNode* result = new (OZ) ChoiceNode (length, OZ);
4206	for (intptr_t i = `0`; i < length; i++) {
4207	GuardedAlternative alternative(
4208	alternatives->At(index: i)->ToNode(compiler, on_success));
4209	result->AddAlternative(node: alternative);
4210	}
4211	return result;
4212	}
4213
4214	RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
4215	RegExpNode* on_success) {
4216	return ToNode(min: min(), max: max(), is_greedy: is_greedy(), body: body(), compiler, on_success);
4217	}
4218
4219	// Scoped object to keep track of how much we unroll quantifier loops in the
4220	// regexp graph generator.
4221	class RegExpExpansionLimiter : public ValueObject {
4222	public:
4223	static constexpr intptr_t kMaxExpansionFactor = `6`;
4224	RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)
4225	: compiler_(compiler),
4226	saved_expansion_factor_(compiler->current_expansion_factor()),
4227	ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
4228	ASSERT(factor > `0`);
4229	if (ok_to_expand_) {
4230	if (factor > kMaxExpansionFactor) {
4231	// Avoid integer overflow of the current expansion factor.
4232	ok_to_expand_ = false;
4233	compiler->set_current_expansion_factor(kMaxExpansionFactor + `1`);
4234	} else {
4235	intptr_t new_factor = saved_expansion_factor_ * factor;
4236	ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
4237	compiler->set_current_expansion_factor(new_factor);
4238	}
4239	}
4240	}
4241
4242	~RegExpExpansionLimiter() {
4243	compiler_->set_current_expansion_factor(saved_expansion_factor_);
4244	}
4245
4246	bool ok_to_expand() { return ok_to_expand_; }
4247
4248	private:
4249	RegExpCompiler* compiler_;
4250	intptr_t saved_expansion_factor_;
4251	bool ok_to_expand_;
4252
4253	DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
4254	};
4255
4256	RegExpNode* RegExpQuantifier::ToNode(intptr_t min,
4257	intptr_t max,
4258	bool is_greedy,
4259	RegExpTree* body,
4260	RegExpCompiler* compiler,
4261	RegExpNode* on_success,
4262	bool not_at_start) {
4263	// x{f, t} becomes this:
4264	//
4265	// (r++)<-.
4266	// \| `
4267	// \| (x)
4268	// v ^
4269	// (r=0)-->(?)---/ [if r < t]
4270	// \|
4271	// [if r >= f] \----> ...
4272	//
4273
4274	// 15.10.2.5 RepeatMatcher algorithm.
4275	// The parser has already eliminated the case where max is 0. In the case
4276	// where max_match is zero the parser has removed the quantifier if min was
4277	// > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
4278
4279	// If we know that we cannot match zero length then things are a little
4280	// simpler since we don't need to make the special zero length match check
4281	// from step 2.1. If the min and max are small we can unroll a little in
4282	// this case.
4283	// Unroll (foo)+ and (foo){3,}
4284	const intptr_t kMaxUnrolledMinMatches = `3`;
4285	// Unroll (foo)? and (foo){x,3}
4286	const intptr_t kMaxUnrolledMaxMatches = `3`;
4287	if (max == `0`) return on_success; // This can happen due to recursion.
4288	bool body_can_be_empty = (body->min_match() == `0`);
4289	intptr_t body_start_reg = RegExpCompiler::kNoRegister;
4290	Interval capture_registers = body->CaptureRegisters();
4291	bool needs_capture_clearing = !capture_registers.is_empty();
4292	Zone* zone = compiler->zone();
4293
4294	if (body_can_be_empty) {
4295	body_start_reg = compiler->AllocateRegister();
4296	} else if (kRegexpOptimization && !needs_capture_clearing) {
4297	// Only unroll if there are no captures and the body can't be
4298	// empty.
4299	{
4300	RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? `1` : `0`));
4301	if (min > `0` && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4302	intptr_t new_max = (max == kInfinity) ? max : max - min;
4303	// Recurse once to get the loop or optional matches after the fixed
4304	// ones.
4305	RegExpNode* answer =
4306	ToNode(min: `0`, max: new_max, is_greedy, body, compiler, on_success, not_at_start: true);
4307	// Unroll the forced matches from 0 to min. This can cause chains of
4308	// TextNodes (which the parser does not generate). These should be
4309	// combined if it turns out they hinder good code generation.
4310	for (intptr_t i = `0`; i < min; i++) {
4311	answer = body->ToNode(compiler, on_success: answer);
4312	}
4313	return answer;
4314	}
4315	}
4316	if (max <= kMaxUnrolledMaxMatches && min == `0`) {
4317	ASSERT(max > `0`); // Due to the 'if' above.
4318	RegExpExpansionLimiter limiter(compiler, max);
4319	if (limiter.ok_to_expand()) {
4320	// Unroll the optional matches up to max.
4321	RegExpNode* answer = on_success;
4322	for (intptr_t i = `0`; i < max; i++) {
4323	ChoiceNode* alternation = new (zone) ChoiceNode (`2`, zone);
4324	if (is_greedy) {
4325	alternation->AddAlternative(
4326	node: GuardedAlternative (body->ToNode(compiler, on_success: answer)));
4327	alternation->AddAlternative(node: GuardedAlternative (on_success));
4328	} else {
4329	alternation->AddAlternative(node: GuardedAlternative (on_success));
4330	alternation->AddAlternative(
4331	node: GuardedAlternative (body->ToNode(compiler, on_success: answer)));
4332	}
4333	answer = alternation;
4334	if (not_at_start && !compiler->read_backward()) {
4335	alternation->set_not_at_start();
4336	}
4337	}
4338	return answer;
4339	}
4340	}
4341	}
4342	bool has_min = min > `0`;
4343	bool has_max = max < RegExpTree::kInfinity;
4344	bool needs_counter = has_min \|\| has_max;
4345	intptr_t reg_ctr = needs_counter ? compiler->AllocateRegister()
4346	: RegExpCompiler::kNoRegister;
4347	LoopChoiceNode* center = new (zone)
4348	LoopChoiceNode (body->min_match() == `0`, compiler->read_backward(), zone);
4349	if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
4350	RegExpNode* loop_return =
4351	needs_counter ? static_cast<RegExpNode*>(
4352	ActionNode::IncrementRegister(reg: reg_ctr, on_success: center))
4353	: static_cast<RegExpNode*>(center);
4354	if (body_can_be_empty) {
4355	// If the body can be empty we need to check if it was and then
4356	// backtrack.
4357	loop_return =
4358	ActionNode::EmptyMatchCheck(start_register: body_start_reg, repetition_register: reg_ctr, repetition_limit: min, on_success: loop_return);
4359	}
4360	RegExpNode* body_node = body->ToNode(compiler, on_success: loop_return);
4361	if (body_can_be_empty) {
4362	// If the body can be empty we need to store the start position
4363	// so we can bail out if it was empty.
4364	body_node = ActionNode::StorePosition(reg: body_start_reg, is_capture: false, on_success: body_node);
4365	}
4366	if (needs_capture_clearing) {
4367	// Before entering the body of this loop we need to clear captures.
4368	body_node = ActionNode::ClearCaptures(range: capture_registers, on_success: body_node);
4369	}
4370	GuardedAlternative body_alt(body_node);
4371	if (has_max) {
4372	Guard* body_guard = new (zone) Guard (reg_ctr, Guard::LT, max);
4373	body_alt.AddGuard(guard: body_guard, zone);
4374	}
4375	GuardedAlternative rest_alt(on_success);
4376	if (has_min) {
4377	Guard* rest_guard = new (zone) Guard (reg_ctr, Guard::GEQ, min);
4378	rest_alt.AddGuard(guard: rest_guard, zone);
4379	}
4380	if (is_greedy) {
4381	center->AddLoopAlternative(alt: body_alt);
4382	center->AddContinueAlternative(alt: rest_alt);
4383	} else {
4384	center->AddContinueAlternative(alt: rest_alt);
4385	center->AddLoopAlternative(alt: body_alt);
4386	}
4387	if (needs_counter) {
4388	return ActionNode::SetRegister(reg: reg_ctr, val: `0`, on_success: center);
4389	} else {
4390	return center;
4391	}
4392	}
4393
4394	namespace {
4395	// Desugar \b to (?<=\w)(?=\W)\|(?<=\W)(?=\w) and
4396	// \B to (?<=\w)(?=\w)\|(?<=\W)(?=\W)
4397	RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
4398	RegExpNode* on_success,
4399	RegExpAssertion::AssertionType type,
4400	RegExpFlags flags) {
4401	ASSERT(flags.NeedsUnicodeCaseEquivalents());
4402	ZoneGrowableArray<CharacterRange>* word_range =
4403	new ZoneGrowableArray<CharacterRange>(`2`);
4404	CharacterRange::AddClassEscape(type: `'w'`, ranges: word_range, add_unicode_case_equivalents: true);
4405	int stack_register = compiler->UnicodeLookaroundStackRegister();
4406	int position_register = compiler->UnicodeLookaroundPositionRegister();
4407	ChoiceNode* result = new (OZ) ChoiceNode (`2`, OZ);
4408	// Add two choices. The (non-)boundary could start with a word or
4409	// a non-word-character.
4410	for (int i = `0`; i < `2`; i++) {
4411	bool lookbehind_for_word = i == `0`;
4412	bool lookahead_for_word =
4413	(type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
4414	// Look to the left.
4415	RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
4416	stack_register, position_register);
4417	RegExpNode* backward = TextNode::CreateForCharacterRanges(
4418	ranges: word_range, read_backward: true, on_success: lookbehind.on_match_success(), flags);
4419	// Look to the right.
4420	RegExpLookaround::Builder lookahead(lookahead_for_word,
4421	lookbehind.ForMatch(match: backward),
4422	stack_register, position_register);
4423	RegExpNode* forward = TextNode::CreateForCharacterRanges(
4424	ranges: word_range, read_backward: false, on_success: lookahead.on_match_success(), flags);
4425	result->AddAlternative(node: GuardedAlternative (lookahead.ForMatch(match: forward)));
4426	}
4427	return result;
4428	}
4429	} // anonymous namespace
4430
4431	RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
4432	RegExpNode* on_success) {
4433	switch (assertion_type()) {
4434	case START_OF_LINE:
4435	return AssertionNode::AfterNewline(on_success);
4436	case START_OF_INPUT:
4437	return AssertionNode::AtStart(on_success);
4438	case BOUNDARY:
4439	return flags_.NeedsUnicodeCaseEquivalents()
4440	? BoundaryAssertionAsLookaround(compiler, on_success, type: BOUNDARY,
4441	flags: flags_)
4442	: AssertionNode::AtBoundary(on_success);
4443	case NON_BOUNDARY:
4444	return flags_.NeedsUnicodeCaseEquivalents()
4445	? BoundaryAssertionAsLookaround(compiler, on_success,
4446	type: NON_BOUNDARY, flags: flags_)
4447	: AssertionNode::AtNonBoundary(on_success);
4448	case END_OF_INPUT:
4449	return AssertionNode::AtEnd(on_success);
4450	case END_OF_LINE: {
4451	// Compile $ in multiline regexps as an alternation with a positive
4452	// lookahead in one side and an end-of-input on the other side.
4453	// We need two registers for the lookahead.
4454	intptr_t stack_pointer_register = compiler->AllocateRegister();
4455	intptr_t position_register = compiler->AllocateRegister();
4456	// The ChoiceNode to distinguish between a newline and end-of-input.
4457	ChoiceNode* result = new ChoiceNode (`2`, on_success->zone());
4458	// Create a newline atom.
4459	ZoneGrowableArray<CharacterRange>* newline_ranges =
4460	new ZoneGrowableArray<CharacterRange>(`3`);
4461	CharacterRange::AddClassEscape(type: `'n'`, ranges: newline_ranges);
4462	RegExpCharacterClass* newline_atom =
4463	new RegExpCharacterClass (`'n'`, RegExpFlags ());
4464	TextNode* newline_matcher =
4465	new TextNode (newline_atom, /read_backwards=/false,
4466	ActionNode::PositiveSubmatchSuccess(
4467	stack_reg: stack_pointer_register, position_reg: position_register,
4468	clear_register_count: `0`, // No captures inside.
4469	clear_register_from: -`1`, // Ignored if no captures.
4470	on_success));
4471	// Create an end-of-input matcher.
4472	RegExpNode* end_of_line = ActionNode::BeginSubmatch(
4473	stack_reg: stack_pointer_register, position_reg: position_register, on_success: newline_matcher);
4474	// Add the two alternatives to the ChoiceNode.
4475	GuardedAlternative eol_alternative(end_of_line);
4476	result->AddAlternative(node: eol_alternative);
4477	GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4478	result->AddAlternative(node: end_alternative);
4479	return result;
4480	}
4481	default:
4482	UNREACHABLE();
4483	}
4484	return on_success;
4485	}
4486
4487	RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
4488	RegExpNode* on_success) {
4489	return new (OZ) BackReferenceNode (RegExpCapture::StartRegister(index: index()),
4490	RegExpCapture::EndRegister(index: index()), flags_,
4491	compiler->read_backward(), on_success);
4492	}
4493
4494	RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
4495	RegExpNode* on_success) {
4496	return on_success;
4497	}
4498
4499	RegExpLookaround::Builder::Builder(bool is_positive,
4500	RegExpNode* on_success,
4501	intptr_t stack_pointer_register,
4502	intptr_t position_register,
4503	intptr_t capture_register_count,
4504	intptr_t capture_register_start)
4505	: is_positive_(is_positive),
4506	on_success_(on_success),
4507	stack_pointer_register_(stack_pointer_register),
4508	position_register_(position_register) {
4509	if (is_positive_) {
4510	on_match_success_ = ActionNode::PositiveSubmatchSuccess(
4511	stack_reg: stack_pointer_register, position_reg: position_register, clear_register_count: capture_register_count,
4512	clear_register_from: capture_register_start, on_success);
4513	} else {
4514	on_match_success_ = new (OZ) NegativeSubmatchSuccess (
4515	stack_pointer_register, position_register, capture_register_count,
4516	capture_register_start, OZ);
4517	}
4518	}
4519
4520	RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
4521	if (is_positive_) {
4522	return ActionNode::BeginSubmatch(stack_reg: stack_pointer_register_,
4523	position_reg: position_register_, on_success: match);
4524	} else {
4525	Zone* zone = on_success_->zone();
4526	// We use a ChoiceNode to represent the negative lookaround. The first
4527	// alternative is the negative match. On success, the end node backtracks.
4528	// On failure, the second alternative is tried and leads to success.
4529	// NegativeLookaroundChoiceNode is a special ChoiceNode that ignores the
4530	// first exit when calculating quick checks.
4531	ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode (
4532	GuardedAlternative (match), GuardedAlternative (on_success_), zone);
4533	return ActionNode::BeginSubmatch(stack_reg: stack_pointer_register_,
4534	position_reg: position_register_, on_success: choice_node);
4535	}
4536	}
4537
4538	RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
4539	RegExpNode* on_success) {
4540	intptr_t stack_pointer_register = compiler->AllocateRegister();
4541	intptr_t position_register = compiler->AllocateRegister();
4542
4543	const intptr_t registers_per_capture = `2`;
4544	const intptr_t register_of_first_capture = `2`;
4545	intptr_t register_count = capture_count_ * registers_per_capture;
4546	intptr_t register_start =
4547	register_of_first_capture + capture_from_ * registers_per_capture;
4548
4549	RegExpNode* result;
4550	bool was_reading_backward = compiler->read_backward();
4551	compiler->set_read_backward(type() == LOOKBEHIND);
4552	Builder builder(is_positive(), on_success, stack_pointer_register,
4553	position_register, register_count, register_start);
4554	RegExpNode* match = body_->ToNode(compiler, on_success: builder.on_match_success());
4555	result = builder.ForMatch(match);
4556	compiler->set_read_backward(was_reading_backward);
4557	return result;
4558	}
4559
4560	RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
4561	RegExpNode* on_success) {
4562	return ToNode(body: body(), index: index(), compiler, on_success);
4563	}
4564
4565	RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
4566	intptr_t index,
4567	RegExpCompiler* compiler,
4568	RegExpNode* on_success) {
4569	ASSERT(body != nullptr);
4570	intptr_t start_reg = RegExpCapture::StartRegister(index);
4571	intptr_t end_reg = RegExpCapture::EndRegister(index);
4572	if (compiler->read_backward()) {
4573	intptr_t tmp = end_reg;
4574	end_reg = start_reg;
4575	start_reg = tmp;
4576	}
4577	RegExpNode* store_end = ActionNode::StorePosition(reg: end_reg, is_capture: true, on_success);
4578	RegExpNode* body_node = body->ToNode(compiler, on_success: store_end);
4579	return ActionNode::StorePosition(reg: start_reg, is_capture: true, on_success: body_node);
4580	}
4581
4582	RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
4583	RegExpNode* on_success) {
4584	ZoneGrowableArray<RegExpTree> children = nodes();
4585	RegExpNode* current = on_success;
4586	if (compiler->read_backward()) {
4587	for (intptr_t i = `0`; i < children->length(); i++) {
4588	current = children->At(index: i)->ToNode(compiler, on_success: current);
4589	}
4590	} else {
4591	for (intptr_t i = children->length() - `1`; i >= `0`; i--) {
4592	current = children->At(index: i)->ToNode(compiler, on_success: current);
4593	}
4594	}
4595	return current;
4596	}
4597
4598	static void AddClass(const int32_t* elmv,
4599	intptr_t elmc,
4600	ZoneGrowableArray<CharacterRange>* ranges) {
4601	elmc--;
4602	ASSERT(elmv[elmc] == kRangeEndMarker);
4603	for (intptr_t i = `0`; i < elmc; i += `2`) {
4604	ASSERT(elmv[i] < elmv[i + `1`]);
4605	ranges->Add(value: CharacterRange (elmv[i], elmv[i + `1`] - `1`));
4606	}
4607	}
4608
4609	static void AddClassNegated(const int32_t* elmv,
4610	intptr_t elmc,
4611	ZoneGrowableArray<CharacterRange>* ranges) {
4612	elmc--;
4613	ASSERT(elmv[elmc] == kRangeEndMarker);
4614	ASSERT(elmv[`0`] != `0x0000`);
4615	ASSERT(elmv[elmc - `1`] != Utf::kMaxCodePoint);
4616	uint16_t last = `0x0000`;
4617	for (intptr_t i = `0`; i < elmc; i += `2`) {
4618	ASSERT(last <= elmv[i] - `1`);
4619	ASSERT(elmv[i] < elmv[i + `1`]);
4620	ranges->Add(value: CharacterRange (last, elmv[i] - `1`));
4621	last = elmv[i + `1`];
4622	}
4623	ranges->Add(value: CharacterRange (last, Utf::kMaxCodePoint));
4624	}
4625
4626	void CharacterRange::AddClassEscape(uint16_t type,
4627	ZoneGrowableArray<CharacterRange>* ranges,
4628	bool add_unicode_case_equivalents) {
4629	if (add_unicode_case_equivalents && (type == `'w'` \|\| type == `'W'`)) {
4630	// See #sec-runtime-semantics-wordcharacters-abstract-operation
4631	// In case of unicode and ignore_case, we need to create the closure over
4632	// case equivalent characters before negating.
4633	ZoneGrowableArray<CharacterRange>* new_ranges =
4634	new ZoneGrowableArray<CharacterRange>(`2`);
4635	AddClass(elmv: kWordRanges, elmc: kWordRangeCount, ranges: new_ranges);
4636	AddUnicodeCaseEquivalents(ranges: new_ranges);
4637	if (type == `'W'`) {
4638	ZoneGrowableArray<CharacterRange>* negated =
4639	new ZoneGrowableArray<CharacterRange>(`2`);
4640	CharacterRange::Negate(src: new_ranges, dst: negated);
4641	new_ranges = negated;
4642	}
4643	ranges->AddArray(src: *new_ranges);
4644	return;
4645	}
4646	AddClassEscape(type, ranges);
4647	}
4648
4649	void CharacterRange::AddClassEscape(uint16_t type,
4650	ZoneGrowableArray<CharacterRange>* ranges) {
4651	switch (type) {
4652	case `'s'`:
4653	AddClass(elmv: kSpaceRanges, elmc: kSpaceRangeCount, ranges);
4654	break;
4655	case `'S'`:
4656	AddClassNegated(elmv: kSpaceRanges, elmc: kSpaceRangeCount, ranges);
4657	break;
4658	case `'w'`:
4659	AddClass(elmv: kWordRanges, elmc: kWordRangeCount, ranges);
4660	break;
4661	case `'W'`:
4662	AddClassNegated(elmv: kWordRanges, elmc: kWordRangeCount, ranges);
4663	break;
4664	case `'d'`:
4665	AddClass(elmv: kDigitRanges, elmc: kDigitRangeCount, ranges);
4666	break;
4667	case `'D'`:
4668	AddClassNegated(elmv: kDigitRanges, elmc: kDigitRangeCount, ranges);
4669	break;
4670	case `'.'`:
4671	AddClassNegated(elmv: kLineTerminatorRanges, elmc: kLineTerminatorRangeCount, ranges);
4672	break;
4673	// This is not a character range as defined by the spec but a
4674	// convenient shorthand for a character class that matches any
4675	// character.
4676	case `'*'`:
4677	ranges->Add(value: CharacterRange::Everything());
4678	break;
4679	// This is the set of characters matched by the $ and ^ symbols
4680	// in multiline mode.
4681	case `'n'`:
4682	AddClass(elmv: kLineTerminatorRanges, elmc: kLineTerminatorRangeCount, ranges);
4683	break;
4684	default:
4685	UNREACHABLE();
4686	}
4687	}
4688
4689	void CharacterRange::AddCaseEquivalents(
4690	ZoneGrowableArray<CharacterRange>* ranges,
4691	bool is_one_byte,
4692	Zone* zone) {
4693	CharacterRange::Canonicalize(ranges);
4694	int range_count = ranges->length();
4695	for (intptr_t i = `0`; i < range_count; i++) {
4696	CharacterRange range = ranges->At(index: i);
4697	int32_t bottom = range.from();
4698	if (bottom > Utf16::kMaxCodeUnit) continue;
4699	int32_t top = Utils::Minimum(x: range.to(), y: Utf16::kMaxCodeUnit);
4700	// Nothing to be done for surrogates
4701	if (bottom >= Utf16::kLeadSurrogateStart &&
4702	top <= Utf16::kTrailSurrogateEnd) {
4703	continue;
4704	}
4705	if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
4706	if (bottom > Symbols::kMaxOneCharCodeSymbol) continue;
4707	if (top > Symbols::kMaxOneCharCodeSymbol) {
4708	top = Symbols::kMaxOneCharCodeSymbol;
4709	}
4710	}
4711
4712	unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
4713	unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
4714	int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4715	if (top == bottom) {
4716	// If this is a singleton we just expand the one character.
4717	intptr_t length = jsregexp_uncanonicalize.get(c: bottom, n: `'\0'`, result: chars);
4718	for (intptr_t i = `0`; i < length; i++) {
4719	int32_t chr = chars[i];
4720	if (chr != bottom) {
4721	ranges->Add(value: CharacterRange::Singleton(value: chars[i]));
4722	}
4723	}
4724	} else {
4725	// If this is a range we expand the characters block by block,
4726	// expanding contiguous subranges (blocks) one at a time.
4727	// The approach is as follows. For a given start character we
4728	// look up the remainder of the block that contains it (represented
4729	// by the end point), for instance we find 'z' if the character
4730	// is 'c'. A block is characterized by the property
4731	// that all characters uncanonicalize in the same way, except that
4732	// each entry in the result is incremented by the distance from the first
4733	// element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A']
4734	// and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
4735	// Once we've found the end point we look up its uncanonicalization
4736	// and produce a range for each element. For instance for [c-f]
4737	// we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
4738	// add a range if it is not already contained in the input, so [c-f]
4739	// will be skipped but [C-F] will be added. If this range is not
4740	// completely contained in a block we do this for all the blocks
4741	// covered by the range (handling characters that is not in a block
4742	// as a "singleton block").
4743	int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4744	intptr_t pos = bottom;
4745	while (pos <= top) {
4746	intptr_t length = jsregexp_canonrange.get(c: pos, n: `'\0'`, result: range);
4747	int32_t block_end;
4748	if (length == `0`) {
4749	block_end = pos;
4750	} else {
4751	ASSERT(length == `1`);
4752	block_end = range[`0`];
4753	}
4754	intptr_t end = (block_end > top) ? top : block_end;
4755	length = jsregexp_uncanonicalize.get(c: block_end, n: `'\0'`, result: range);
4756	for (intptr_t i = `0`; i < length; i++) {
4757	int32_t c = range[i];
4758	int32_t range_from = c - (block_end - pos);
4759	int32_t range_to = c - (block_end - end);
4760	if (!(bottom <= range_from && range_to <= top)) {
4761	ranges->Add(value: CharacterRange (range_from, range_to));
4762	}
4763	}
4764	pos = end + `1`;
4765	}
4766	}
4767	}
4768	}
4769
4770	bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) {
4771	ASSERT(ranges != nullptr);
4772	intptr_t n = ranges->length();
4773	if (n <= `1`) return true;
4774	intptr_t max = ranges->At(index: `0`).to();
4775	for (intptr_t i = `1`; i < n; i++) {
4776	CharacterRange next_range = ranges->At(index: i);
4777	if (next_range.from() <= max + `1`) return false;
4778	max = next_range.to();
4779	}
4780	return true;
4781	}
4782
4783	ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() {
4784	if (ranges_ == nullptr) {
4785	ranges_ = new ZoneGrowableArray<CharacterRange>(`2`);
4786	CharacterRange::AddClassEscape(type: standard_set_type_, ranges: ranges_);
4787	}
4788	return ranges_;
4789	}
4790
4791	// Move a number of elements in a zone array to another position
4792	// in the same array. Handles overlapping source and target areas.
4793	static void MoveRanges(ZoneGrowableArray<CharacterRange>* list,
4794	intptr_t from,
4795	intptr_t to,
4796	intptr_t count) {
4797	// Ranges are potentially overlapping.
4798	if (from < to) {
4799	for (intptr_t i = count - `1`; i >= `0`; i--) {
4800	(*list)[to + i] = list->At(index: from + i);
4801	}
4802	} else {
4803	for (intptr_t i = `0`; i < count; i++) {
4804	(*list)[to + i] = list->At(index: from + i);
4805	}
4806	}
4807	}
4808
4809	static intptr_t InsertRangeInCanonicalList(
4810	ZoneGrowableArray<CharacterRange>* list,
4811	intptr_t count,
4812	CharacterRange insert) {
4813	// Inserts a range into list[0..count[, which must be sorted
4814	// by from value and non-overlapping and non-adjacent, using at most
4815	// list[0..count] for the result. Returns the number of resulting
4816	// canonicalized ranges. Inserting a range may collapse existing ranges into
4817	// fewer ranges, so the return value can be anything in the range 1..count+1.
4818	int32_t from = insert.from();
4819	int32_t to = insert.to();
4820	intptr_t start_pos = `0`;
4821	intptr_t end_pos = count;
4822	for (intptr_t i = count - `1`; i >= `0`; i--) {
4823	CharacterRange current = list->At(index: i);
4824	if (current.from() > to + `1`) {
4825	end_pos = i;
4826	} else if (current.to() + `1` < from) {
4827	start_pos = i + `1`;
4828	break;
4829	}
4830	}
4831
4832	// Inserted range overlaps, or is adjacent to, ranges at positions
4833	// [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
4834	// not affected by the insertion.
4835	// If start_pos == end_pos, the range must be inserted before start_pos.
4836	// if start_pos < end_pos, the entire range from start_pos to end_pos
4837	// must be merged with the insert range.
4838
4839	if (start_pos == end_pos) {
4840	// Insert between existing ranges at position start_pos.
4841	if (start_pos < count) {
4842	MoveRanges(list, from: start_pos, to: start_pos + `1`, count: count - start_pos);
4843	}
4844	(*list)[start_pos] = insert;
4845	return count + `1`;
4846	}
4847	if (start_pos + `1` == end_pos) {
4848	// Replace single existing range at position start_pos.
4849	CharacterRange to_replace = list->At(index: start_pos);
4850	intptr_t new_from = Utils::Minimum(x: to_replace.from(), y: from);
4851	intptr_t new_to = Utils::Maximum(x: to_replace.to(), y: to);
4852	(*list)[start_pos] = CharacterRange (new_from, new_to);
4853	return count;
4854	}
4855	// Replace a number of existing ranges from start_pos to end_pos - 1.
4856	// Move the remaining ranges down.
4857
4858	intptr_t new_from = Utils::Minimum(x: list->At(index: start_pos).from(), y: from);
4859	intptr_t new_to = Utils::Maximum(x: list->At(index: end_pos - `1`).to(), y: to);
4860	if (end_pos < count) {
4861	MoveRanges(list, from: end_pos, to: start_pos + `1`, count: count - end_pos);
4862	}
4863	(*list)[start_pos] = CharacterRange (new_from, new_to);
4864	return count - (end_pos - start_pos) + `1`;
4865	}
4866
4867	void CharacterSet::Canonicalize() {
4868	// Special/default classes are always considered canonical. The result
4869	// of calling ranges() will be sorted.
4870	if (ranges_ == nullptr) return;
4871	CharacterRange::Canonicalize(ranges: ranges_);
4872	}
4873
4874	void CharacterRange::Canonicalize(
4875	ZoneGrowableArray<CharacterRange>* character_ranges) {
4876	if (character_ranges->length() <= `1`) return;
4877	// Check whether ranges are already canonical (increasing, non-overlapping,
4878	// non-adjacent).
4879	intptr_t n = character_ranges->length();
4880	intptr_t max = character_ranges->At(index: `0`).to();
4881	intptr_t i = `1`;
4882	while (i < n) {
4883	CharacterRange current = character_ranges->At(index: i);
4884	if (current.from() <= max + `1`) {
4885	break;
4886	}
4887	max = current.to();
4888	i++;
4889	}
4890	// Canonical until the i'th range. If that's all of them, we are done.
4891	if (i == n) return;
4892
4893	// The ranges at index i and forward are not canonicalized. Make them so by
4894	// doing the equivalent of insertion sort (inserting each into the previous
4895	// list, in order).
4896	// Notice that inserting a range can reduce the number of ranges in the
4897	// result due to combining of adjacent and overlapping ranges.
4898	intptr_t read = i; // Range to insert.
4899	intptr_t num_canonical = i; // Length of canonicalized part of list.
4900	do {
4901	num_canonical = InsertRangeInCanonicalList(list: character_ranges, count: num_canonical,
4902	insert: character_ranges->At(index: read));
4903	read++;
4904	} while (read < n);
4905	character_ranges->TruncateTo(length: num_canonical);
4906
4907	ASSERT(CharacterRange::IsCanonical(character_ranges));
4908	}
4909
4910	void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges,
4911	ZoneGrowableArray<CharacterRange>* negated_ranges) {
4912	ASSERT(CharacterRange::IsCanonical(ranges));
4913	ASSERT(negated_ranges->length() == `0`);
4914	intptr_t range_count = ranges->length();
4915	uint32_t from = `0`;
4916	intptr_t i = `0`;
4917	if (range_count > `0` && ranges->At(index: `0`).from() == `0`) {
4918	from = ranges->At(index: `0`).to();
4919	i = `1`;
4920	}
4921	while (i < range_count) {
4922	CharacterRange range = ranges->At(index: i);
4923	negated_ranges->Add(value: CharacterRange (from + `1`, range.from() - `1`));
4924	from = range.to();
4925	i++;
4926	}
4927	if (from < Utf::kMaxCodePoint) {
4928	negated_ranges->Add(value: CharacterRange (from + `1`, Utf::kMaxCodePoint));
4929	}
4930	}
4931
4932	// -------------------------------------------------------------------
4933	// Splay tree
4934
4935	// Workaround for the fact that ZoneGrowableArray does not have contains().
4936	static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) {
4937	for (intptr_t i = `0`; i < array->length(); i++) {
4938	if (array->At(index: i) == value) {
4939	return true;
4940	}
4941	}
4942	return false;
4943	}
4944
4945	OutSet* OutSet::Extend(unsigned value, Zone* zone) {
4946	if (Get(value)) return this;
4947	if (successors() != nullptr) {
4948	for (int i = `0`; i < successors()->length(); i++) {
4949	OutSet* successor = successors()->At(index: i);
4950	if (successor->Get(value)) return successor;
4951	}
4952	} else {
4953	successors_ = new (zone) ZoneGrowableArray<OutSet*>(`2`);
4954	}
4955	OutSet* result = new (zone) OutSet (first_, remaining_);
4956	result->Set(value, zone);
4957	successors()->Add(value: result);
4958	return result;
4959	}
4960
4961	void OutSet::Set(unsigned value, Zone* zone) {
4962	if (value < kFirstLimit) {
4963	first_ \|= (`1` << value);
4964	} else {
4965	if (remaining_ == nullptr)
4966	remaining_ = new (zone) ZoneGrowableArray<unsigned>(`1`);
4967
4968	bool remaining_contains_value = ArrayContains(array: remaining_, value);
4969	if (remaining_->is_empty() \|\| !remaining_contains_value) {
4970	remaining_->Add(value);
4971	}
4972	}
4973	}
4974
4975	bool OutSet::Get(unsigned value) const {
4976	if (value < kFirstLimit) {
4977	return (first_ & (`1` << value)) != `0`;
4978	} else if (remaining_ == nullptr) {
4979	return false;
4980	} else {
4981	return ArrayContains(array: remaining_, value);
4982	}
4983	}
4984
4985	const int32_t ChoiceTable::Config::kNoKey = Utf::kInvalidChar;
4986
4987	void ChoiceTable::AddRange(CharacterRange full_range,
4988	int32_t value,
4989	Zone* zone) {
4990	CharacterRange current = full_range;
4991	if (tree()->is_empty()) {
4992	// If this is the first range we just insert into the table.
4993	ZoneSplayTree<Config>::Locator loc;
4994	bool inserted = tree()->Insert(key: current.from(), locator: &loc);
4995	ASSERT(inserted);
4996	USE(inserted);
4997	loc.set_value(
4998	Entry (current.from(), current.to(), empty()->Extend(value, zone)));
4999	return;
5000	}
5001	// First see if there is a range to the left of this one that
5002	// overlaps.
5003	ZoneSplayTree<Config>::Locator loc;
5004	if (tree()->FindGreatestLessThan(key: current.from(), locator: &loc)) {
5005	Entry* entry = &loc.value();
5006	// If we've found a range that overlaps with this one, and it
5007	// starts strictly to the left of this one, we have to fix it
5008	// because the following code only handles ranges that start on
5009	// or after the start point of the range we're adding.
5010	if (entry->from() < current.from() && entry->to() >= current.from()) {
5011	// Snap the overlapping range in half around the start point of
5012	// the range we're adding.
5013	CharacterRange left =
5014	CharacterRange::Range(from: entry->from(), to: current.from() - `1`);
5015	CharacterRange right = CharacterRange::Range(from: current.from(), to: entry->to());
5016	// The left part of the overlapping range doesn't overlap.
5017	// Truncate the whole entry to be just the left part.
5018	entry->set_to(left.to());
5019	// The right part is the one that overlaps. We add this part
5020	// to the map and let the next step deal with merging it with
5021	// the range we're adding.
5022	ZoneSplayTree<Config>::Locator loc;
5023	bool inserted = tree()->Insert(key: right.from(), locator: &loc);
5024	ASSERT(inserted);
5025	USE(inserted);
5026	loc.set_value(Entry (right.from(), right.to(), entry->out_set()));
5027	}
5028	}
5029	while (current.is_valid()) {
5030	if (tree()->FindLeastGreaterThan(key: current.from(), locator: &loc) &&
5031	(loc.value().from() <= current.to()) &&
5032	(loc.value().to() >= current.from())) {
5033	Entry* entry = &loc.value();
5034	// We have overlap. If there is space between the start point of
5035	// the range we're adding and where the overlapping range starts
5036	// then we have to add a range covering just that space.
5037	if (current.from() < entry->from()) {
5038	ZoneSplayTree<Config>::Locator ins;
5039	bool inserted = tree()->Insert(key: current.from(), locator: &ins);
5040	ASSERT(inserted);
5041	USE(inserted);
5042	ins.set_value(Entry (current.from(), entry->from() - `1`,
5043	empty()->Extend(value, zone)));
5044	current.set_from(entry->from());
5045	}
5046	ASSERT(current.from() == entry->from());
5047	// If the overlapping range extends beyond the one we want to add
5048	// we have to snap the right part off and add it separately.
5049	if (entry->to() > current.to()) {
5050	ZoneSplayTree<Config>::Locator ins;
5051	bool inserted = tree()->Insert(key: current.to() + `1`, locator: &ins);
5052	ASSERT(inserted);
5053	USE(inserted);
5054	ins.set_value(Entry (current.to() + `1`, entry->to(), entry->out_set()));
5055	entry->set_to(current.to());
5056	}
5057	ASSERT(entry->to() <= current.to());
5058	// The overlapping range is now completely contained by the range
5059	// we're adding so we can just update it and move the start point
5060	// of the range we're adding just past it.
5061	entry->AddValue(value, zone);
5062	ASSERT(entry->to() + `1` > current.from());
5063	current.set_from(entry->to() + `1`);
5064	} else {
5065	// There is no overlap so we can just add the range
5066	ZoneSplayTree<Config>::Locator ins;
5067	bool inserted = tree()->Insert(key: current.from(), locator: &ins);
5068	ASSERT(inserted);
5069	USE(inserted);
5070	ins.set_value(
5071	Entry (current.from(), current.to(), empty()->Extend(value, zone)));
5072	break;
5073	}
5074	}
5075	}
5076
5077	OutSet* ChoiceTable::Get(int32_t value) {
5078	ZoneSplayTree<Config>::Locator loc;
5079	if (!tree()->FindGreatestLessThan(key: value, locator: &loc)) return empty();
5080	Entry* entry = &loc.value();
5081	if (value <= entry->to())
5082	return entry->out_set();
5083	else
5084	return empty();
5085	}
5086
5087	// -------------------------------------------------------------------
5088	// Analysis
5089
5090	void Analysis::EnsureAnalyzed(RegExpNode* that) {
5091	if (that->info()->been_analyzed \|\| that->info()->being_analyzed) return;
5092	that->info()->being_analyzed = true;
5093	that->Accept(visitor: this);
5094	that->info()->being_analyzed = false;
5095	that->info()->been_analyzed = true;
5096	}
5097
5098	void Analysis::VisitEnd(EndNode* that) {
5099	// nothing to do
5100	}
5101
5102	void TextNode::CalculateOffsets() {
5103	intptr_t element_count = elements()->length();
5104	// Set up the offsets of the elements relative to the start. This is a fixed
5105	// quantity since a TextNode can only contain fixed-width things.
5106	intptr_t cp_offset = `0`;
5107	for (intptr_t i = `0`; i < element_count; i++) {
5108	TextElement& elm = (*elements())[i];
5109	elm.set_cp_offset(cp_offset);
5110	cp_offset += elm.length();
5111	}
5112	}
5113
5114	void Analysis::VisitText(TextNode* that) {
5115	that->MakeCaseIndependent(is_one_byte: is_one_byte_);
5116	EnsureAnalyzed(that: that->on_success());
5117	if (!has_failed()) {
5118	that->CalculateOffsets();
5119	}
5120	}
5121
5122	void Analysis::VisitAction(ActionNode* that) {
5123	RegExpNode* target = that->on_success();
5124	EnsureAnalyzed(that: target);
5125	if (!has_failed()) {
5126	// If the next node is interested in what it follows then this node
5127	// has to be interested too so it can pass the information on.
5128	that->info()->AddFromFollowing(that: target->info());
5129	}
5130	}
5131
5132	void Analysis::VisitChoice(ChoiceNode* that) {
5133	NodeInfo* info = that->info();
5134	for (intptr_t i = `0`; i < that->alternatives()->length(); i++) {
5135	RegExpNode* node = (*that->alternatives())[i].node();
5136	EnsureAnalyzed(that: node);
5137	if (has_failed()) return;
5138	// Anything the following nodes need to know has to be known by
5139	// this node also, so it can pass it on.
5140	info->AddFromFollowing(that: node->info());
5141	}
5142	}
5143
5144	void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
5145	NodeInfo* info = that->info();
5146	for (intptr_t i = `0`; i < that->alternatives()->length(); i++) {
5147	RegExpNode* node = (*that->alternatives())[i].node();
5148	if (node != that->loop_node()) {
5149	EnsureAnalyzed(that: node);
5150	if (has_failed()) return;
5151	info->AddFromFollowing(that: node->info());
5152	}
5153	}
5154	// Check the loop last since it may need the value of this node
5155	// to get a correct result.
5156	EnsureAnalyzed(that: that->loop_node());
5157	if (!has_failed()) {
5158	info->AddFromFollowing(that: that->loop_node()->info());
5159	}
5160	}
5161
5162	void Analysis::VisitBackReference(BackReferenceNode* that) {
5163	EnsureAnalyzed(that: that->on_success());
5164	}
5165
5166	void Analysis::VisitAssertion(AssertionNode* that) {
5167	EnsureAnalyzed(that: that->on_success());
5168	}
5169
5170	void BackReferenceNode::FillInBMInfo(intptr_t offset,
5171	intptr_t budget,
5172	BoyerMooreLookahead* bm,
5173	bool not_at_start) {
5174	// Working out the set of characters that a backreference can match is too
5175	// hard, so we just say that any character can match.
5176	bm->SetRest(offset);
5177	SaveBMInfo(bm, not_at_start, offset);
5178	}
5179
5180	COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize ==
5181	RegExpMacroAssembler::kTableSize);
5182
5183	void ChoiceNode::FillInBMInfo(intptr_t offset,
5184	intptr_t budget,
5185	BoyerMooreLookahead* bm,
5186	bool not_at_start) {
5187	ZoneGrowableArray<GuardedAlternative>* alts = alternatives();
5188	budget = (budget - `1`) / alts->length();
5189	for (intptr_t i = `0`; i < alts->length(); i++) {
5190	GuardedAlternative& alt = (*alts)[i];
5191	if (alt.guards() != nullptr && alt.guards()->length() != `0`) {
5192	bm->SetRest(offset); // Give up trying to fill in info.
5193	SaveBMInfo(bm, not_at_start, offset);
5194	return;
5195	}
5196	alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
5197	}
5198	SaveBMInfo(bm, not_at_start, offset);
5199	}
5200
5201	void TextNode::FillInBMInfo(intptr_t initial_offset,
5202	intptr_t budget,
5203	BoyerMooreLookahead* bm,
5204	bool not_at_start) {
5205	if (initial_offset >= bm->length()) return;
5206	intptr_t offset = initial_offset;
5207	intptr_t max_char = bm->max_char();
5208	for (intptr_t i = `0`; i < elements()->length(); i++) {
5209	if (offset >= bm->length()) {
5210	if (initial_offset == `0`) set_bm_info(not_at_start, bm);
5211	return;
5212	}
5213	TextElement text = elements()->At(index: i);
5214	if (text.text_type() == TextElement::ATOM) {
5215	RegExpAtom* atom = text.atom();
5216	for (intptr_t j = `0`; j < atom->length(); j++, offset++) {
5217	if (offset >= bm->length()) {
5218	if (initial_offset == `0`) set_bm_info(not_at_start, bm);
5219	return;
5220	}
5221	uint16_t character = atom->data()->At(index: j);
5222	if (atom->flags().IgnoreCase()) {
5223	int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5224	intptr_t length = GetCaseIndependentLetters(
5225	character, one_byte_subject: bm->max_char() == Symbols::kMaxOneCharCodeSymbol,
5226	letters: chars);
5227	for (intptr_t j = `0`; j < length; j++) {
5228	bm->Set(map_number: offset, character: chars[j]);
5229	}
5230	} else {
5231	if (character <= max_char) bm->Set(map_number: offset, character);
5232	}
5233	}
5234	} else {
5235	ASSERT(text.text_type() == TextElement::CHAR_CLASS);
5236	RegExpCharacterClass* char_class = text.char_class();
5237	ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();
5238	if (char_class->is_negated()) {
5239	bm->SetAll(offset);
5240	} else {
5241	for (intptr_t k = `0`; k < ranges->length(); k++) {
5242	const CharacterRange& range = ranges->At(index: k);
5243	if (range.from() > max_char) continue;
5244	intptr_t to =
5245	Utils::Minimum(x: max_char, y: static_cast<intptr_t>(range.to()));
5246	bm->SetInterval(map_number: offset, interval: Interval (range.from(), to));
5247	}
5248	}
5249	offset++;
5250	}
5251	}
5252	if (offset >= bm->length()) {
5253	if (initial_offset == `0`) set_bm_info(not_at_start, bm);
5254	return;
5255	}
5256	on_success()->FillInBMInfo(offset, budget: budget - `1`, bm,
5257	not_at_start: true); // Not at start after a text node.
5258	if (initial_offset == `0`) set_bm_info(not_at_start, bm);
5259	}
5260
5261	RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
5262	RegExpNode* on_success,
5263	RegExpFlags flags) {
5264	// If the regexp matching starts within a surrogate pair, step back
5265	// to the lead surrogate and start matching from there.
5266	ASSERT(!compiler->read_backward());
5267	Zone* zone = compiler->zone();
5268
5269	auto lead_surrogates = CharacterRange::List(
5270	zone: on_success->zone(), range: CharacterRange::Range(from: Utf16::kLeadSurrogateStart,
5271	to: Utf16::kLeadSurrogateEnd));
5272	auto trail_surrogates = CharacterRange::List(
5273	zone: on_success->zone(), range: CharacterRange::Range(from: Utf16::kTrailSurrogateStart,
5274	to: Utf16::kTrailSurrogateEnd));
5275
5276	ChoiceNode* optional_step_back = new (zone) ChoiceNode (`2`, zone);
5277
5278	int stack_register = compiler->UnicodeLookaroundStackRegister();
5279	int position_register = compiler->UnicodeLookaroundPositionRegister();
5280	RegExpNode* step_back = TextNode::CreateForCharacterRanges(
5281	ranges: lead_surrogates, /read_backward=/true, on_success, flags);
5282	RegExpLookaround::Builder builder(/is_positive=/true, step_back,
5283	stack_register, position_register);
5284	RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
5285	ranges: trail_surrogates, /read_backward=/false, on_success: builder.on_match_success(),
5286	flags);
5287
5288	optional_step_back->AddAlternative(
5289	node: GuardedAlternative (builder.ForMatch(match: match_trail)));
5290	optional_step_back->AddAlternative(node: GuardedAlternative (on_success));
5291
5292	return optional_step_back;
5293	}
5294
5295	#if !defined(DART_PRECOMPILED_RUNTIME)
5296	RegExpEngine::CompilationResult RegExpEngine::CompileIR(
5297	RegExpCompileData* data,
5298	const ParsedFunction* parsed_function,
5299	const ZoneGrowableArray<const ICData*>& ic_data_array,
5300	intptr_t osr_id) {
5301	ASSERT(!FLAG_interpret_irregexp);
5302	Zone* zone = Thread::Current()->zone();
5303
5304	const Function& function = parsed_function->function();
5305	const intptr_t specialization_cid = function.string_specialization_cid();
5306	const bool is_sticky = function.is_sticky_specialization();
5307	const bool is_one_byte = (specialization_cid == kOneByteStringCid \|\|
5308	specialization_cid == kExternalOneByteStringCid);
5309	RegExp& regexp = RegExp::Handle(zone, ptr: function.regexp());
5310	const String& pattern = String::Handle(zone, ptr: regexp.pattern());
5311
5312	ASSERT(!regexp.IsNull());
5313	ASSERT(!pattern.IsNull());
5314
5315	const bool is_global = regexp.flags().IsGlobal();
5316	const bool is_unicode = regexp.flags().IsUnicode();
5317
5318	RegExpCompiler compiler(data->capture_count, is_one_byte);
5319
5320	// TODO(zerny): Frequency sampling is currently disabled because of several
5321	// issues. We do not want to store subject strings in the regexp object since
5322	// they might be long and we should not prevent their garbage collection.
5323	// Passing them to this function explicitly does not help, since we must
5324	// generate exactly the same IR for both the unoptimizing and optimizing
5325	// pipelines (otherwise it gets confused when i.e. deopt id's differ).
5326	// An option would be to store sampling results in the regexp object, but
5327	// I'm not sure the performance gains are relevant enough.
5328
5329	// Wrap the body of the regexp in capture #0.
5330	RegExpNode* captured_body =
5331	RegExpCapture::ToNode(body: data->tree, index: `0`, compiler: &compiler, on_success: compiler.accept());
5332
5333	RegExpNode* node = captured_body;
5334	const bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5335	const bool is_start_anchored = data->tree->IsAnchoredAtStart();
5336	intptr_t max_length = data->tree->max_match();
5337	if (!is_start_anchored && !is_sticky) {
5338	// Add a .? at the beginning, outside the body capture, unless*
5339	// this expression is anchored at the beginning or is sticky.
5340	RegExpNode* loop_node = RegExpQuantifier::ToNode(
5341	min: `0`, max: RegExpTree::kInfinity, is_greedy: false,
5342	body: new (zone) RegExpCharacterClass (`'*'`, RegExpFlags ()), compiler: &compiler,
5343	on_success: captured_body, not_at_start: data->contains_anchor);
5344
5345	if (data->contains_anchor) {
5346	// Unroll loop once, to take care of the case that might start
5347	// at the start of input.
5348	ChoiceNode* first_step_node = new (zone) ChoiceNode (`2`, zone);
5349	first_step_node->AddAlternative(node: GuardedAlternative (captured_body));
5350	first_step_node->AddAlternative(node: GuardedAlternative (new (zone) TextNode (
5351	new (zone) RegExpCharacterClass (`'*'`, RegExpFlags ()),
5352	/read_backwards=/false, loop_node)));
5353	node = first_step_node;
5354	} else {
5355	node = loop_node;
5356	}
5357	}
5358	if (is_one_byte) {
5359	node = node->FilterOneByte(depth: RegExpCompiler::kMaxRecursion);
5360	// Do it again to propagate the new nodes to places where they were not
5361	// put because they had not been calculated yet.
5362	if (node != nullptr) {
5363	node = node->FilterOneByte(depth: RegExpCompiler::kMaxRecursion);
5364	}
5365	} else if (is_unicode && (is_global \|\| is_sticky)) {
5366	node = OptionallyStepBackToLeadSurrogate(compiler: &compiler, on_success: node, flags: regexp.flags());
5367	}
5368
5369	if (node == nullptr) node = new (zone) EndNode (EndNode::BACKTRACK, zone);
5370	data->node = node;
5371	Analysis analysis(is_one_byte);
5372	analysis.EnsureAnalyzed(that: node);
5373	if (analysis.has_failed()) {
5374	const char* error_message = analysis.error_message();
5375	return CompilationResult (error_message);
5376	}
5377
5378	// Native regexp implementation.
5379
5380	IRRegExpMacroAssembler* macro_assembler = new (zone)
5381	IRRegExpMacroAssembler (specialization_cid, data->capture_count,
5382	parsed_function, ic_data_array, osr_id, zone);
5383
5384	// Inserted here, instead of in Assembler, because it depends on information
5385	// in the AST that isn't replicated in the Node structure.
5386	const intptr_t kMaxBacksearchLimit = `1024`;
5387	if (is_end_anchored && !is_start_anchored && !is_sticky &&
5388	max_length < kMaxBacksearchLimit) {
5389	macro_assembler->SetCurrentPositionFromEnd(max_length);
5390	}
5391
5392	if (is_global) {
5393	RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
5394	if (data->tree->min_match() > `0`) {
5395	mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
5396	} else if (is_unicode) {
5397	mode = RegExpMacroAssembler::GLOBAL_UNICODE;
5398	}
5399	macro_assembler->set_global_mode(mode);
5400	}
5401
5402	RegExpEngine::CompilationResult result =
5403	compiler.Assemble(macro_assembler, start: node, capture_count: data->capture_count, pattern);
5404
5405	if (FLAG_trace_irregexp) {
5406	macro_assembler->PrintBlocks();
5407	}
5408
5409	return result;
5410	}
5411	#endif // !defined(DART_PRECOMPILED_RUNTIME)
5412
5413	RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
5414	RegExpCompileData* data,
5415	const RegExp& regexp,
5416	bool is_one_byte,
5417	bool is_sticky,
5418	Zone* zone) {
5419	ASSERT(FLAG_interpret_irregexp);
5420	const String& pattern = String::Handle(zone, ptr: regexp.pattern());
5421
5422	ASSERT(!regexp.IsNull());
5423	ASSERT(!pattern.IsNull());
5424
5425	const bool is_global = regexp.flags().IsGlobal();
5426	const bool is_unicode = regexp.flags().IsUnicode();
5427
5428	RegExpCompiler compiler(data->capture_count, is_one_byte);
5429
5430	// TODO(zerny): Frequency sampling is currently disabled because of several
5431	// issues. We do not want to store subject strings in the regexp object since
5432	// they might be long and we should not prevent their garbage collection.
5433	// Passing them to this function explicitly does not help, since we must
5434	// generate exactly the same IR for both the unoptimizing and optimizing
5435	// pipelines (otherwise it gets confused when i.e. deopt id's differ).
5436	// An option would be to store sampling results in the regexp object, but
5437	// I'm not sure the performance gains are relevant enough.
5438
5439	// Wrap the body of the regexp in capture #0.
5440	RegExpNode* captured_body =
5441	RegExpCapture::ToNode(body: data->tree, index: `0`, compiler: &compiler, on_success: compiler.accept());
5442
5443	RegExpNode* node = captured_body;
5444	bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5445	bool is_start_anchored = data->tree->IsAnchoredAtStart();
5446	intptr_t max_length = data->tree->max_match();
5447	if (!is_start_anchored && !is_sticky) {
5448	// Add a .? at the beginning, outside the body capture, unless*
5449	// this expression is anchored at the beginning.
5450	RegExpNode* loop_node = RegExpQuantifier::ToNode(
5451	min: `0`, max: RegExpTree::kInfinity, is_greedy: false,
5452	body: new (zone) RegExpCharacterClass (`'*'`, RegExpFlags ()), compiler: &compiler,
5453	on_success: captured_body, not_at_start: data->contains_anchor);
5454
5455	if (data->contains_anchor) {
5456	// Unroll loop once, to take care of the case that might start
5457	// at the start of input.
5458	ChoiceNode* first_step_node = new (zone) ChoiceNode (`2`, zone);
5459	first_step_node->AddAlternative(node: GuardedAlternative (captured_body));
5460	first_step_node->AddAlternative(node: GuardedAlternative (new (zone) TextNode (
5461	new (zone) RegExpCharacterClass (`'*'`, RegExpFlags ()),
5462	/read_backwards=/false, loop_node)));
5463	node = first_step_node;
5464	} else {
5465	node = loop_node;
5466	}
5467	}
5468	if (is_one_byte) {
5469	node = node->FilterOneByte(depth: RegExpCompiler::kMaxRecursion);
5470	// Do it again to propagate the new nodes to places where they were not
5471	// put because they had not been calculated yet.
5472	if (node != nullptr) {
5473	node = node->FilterOneByte(depth: RegExpCompiler::kMaxRecursion);
5474	}
5475	} else if (is_unicode && (is_global \|\| is_sticky)) {
5476	node = OptionallyStepBackToLeadSurrogate(compiler: &compiler, on_success: node, flags: regexp.flags());
5477	}
5478
5479	if (node == nullptr) node = new (zone) EndNode (EndNode::BACKTRACK, zone);
5480	data->node = node;
5481	Analysis analysis(is_one_byte);
5482	analysis.EnsureAnalyzed(that: node);
5483	if (analysis.has_failed()) {
5484	const char* error_message = analysis.error_message();
5485	return CompilationResult (error_message);
5486	}
5487
5488	// Bytecode regexp implementation.
5489
5490	ZoneGrowableArray<uint8_t> buffer(zone, `1024`);
5491	BytecodeRegExpMacroAssembler* macro_assembler =
5492	new (zone) BytecodeRegExpMacroAssembler (&buffer, zone);
5493
5494	// Inserted here, instead of in Assembler, because it depends on information
5495	// in the AST that isn't replicated in the Node structure.
5496	const intptr_t kMaxBacksearchLimit = `1024`;
5497	if (is_end_anchored && !is_start_anchored && !is_sticky &&
5498	max_length < kMaxBacksearchLimit) {
5499	macro_assembler->SetCurrentPositionFromEnd(max_length);
5500	}
5501
5502	if (is_global) {
5503	RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
5504	if (data->tree->min_match() > `0`) {
5505	mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
5506	} else if (is_unicode) {
5507	mode = RegExpMacroAssembler::GLOBAL_UNICODE;
5508	}
5509	macro_assembler->set_global_mode(mode);
5510	}
5511
5512	RegExpEngine::CompilationResult result =
5513	compiler.Assemble(macro_assembler, start: node, capture_count: data->capture_count, pattern);
5514
5515	if (FLAG_trace_irregexp) {
5516	macro_assembler->PrintBlocks();
5517	}
5518
5519	return result;
5520	}
5521
5522	void CreateSpecializedFunction(Thread* thread,
5523	Zone* zone,
5524	const RegExp& regexp,
5525	intptr_t specialization_cid,
5526	bool sticky,
5527	const Object& owner) {
5528	const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5529
5530	const FunctionType& signature =
5531	FunctionType::Handle(zone, ptr: FunctionType::New());
5532	const String& pattern = String::Handle(zone, ptr: regexp.pattern());
5533	Function& fn =
5534	Function::Handle(zone, ptr: Function::New(signature, name: pattern,
5535	kind: UntaggedFunction::kIrregexpFunction,
5536	is_static: true, // Static.
5537	is_const: false, // Not const.
5538	is_abstract: false, // Not abstract.
5539	is_external: false, // Not external.
5540	is_native: false, // Not native.
5541	owner, token_pos: TokenPosition::kMinSource));
5542
5543	// TODO(zerny): Share these arrays between all irregexp functions.
5544	// TODO(regis): Better, share a common signature.
5545	signature.set_num_fixed_parameters(kParamCount);
5546	signature.set_parameter_types(
5547	Array::Handle(zone, ptr: Array::New(len: kParamCount, space: Heap::kOld)));
5548	fn.CreateNameArray();
5549	signature.SetParameterTypeAt(index: RegExpMacroAssembler::kParamRegExpIndex,
5550	value: Object::dynamic_type());
5551	fn.SetParameterNameAt(index: RegExpMacroAssembler::kParamRegExpIndex,
5552	value: Symbols::This());
5553	signature.SetParameterTypeAt(index: RegExpMacroAssembler::kParamStringIndex,
5554	value: Object::dynamic_type());
5555	fn.SetParameterNameAt(index: RegExpMacroAssembler::kParamStringIndex,
5556	value: Symbols::string_param());
5557	signature.SetParameterTypeAt(index: RegExpMacroAssembler::kParamStartOffsetIndex,
5558	value: Object::dynamic_type());
5559	fn.SetParameterNameAt(index: RegExpMacroAssembler::kParamStartOffsetIndex,
5560	value: Symbols::start_index_param());
5561	signature.set_result_type(Type::Handle(zone, ptr: Type::ArrayType()));
5562
5563	// Cache the result.
5564	regexp.set_function(cid: specialization_cid, sticky, value: fn);
5565
5566	fn.SetRegExpData(regexp, string_specialization_cid: specialization_cid, sticky);
5567	fn.set_is_debuggable(false);
5568
5569	// The function is compiled lazily during the first call.
5570	}
5571
5572	RegExpPtr RegExpEngine::CreateRegExp(Thread* thread,
5573	const String& pattern,
5574	RegExpFlags flags) {
5575	Zone* zone = thread->zone();
5576	const RegExp& regexp = RegExp::Handle(ptr: RegExp::New(zone));
5577
5578	regexp.set_pattern(pattern);
5579	regexp.set_flags(flags);
5580
5581	// TODO(zerny): We might want to use normal string searching algorithms
5582	// for simple patterns.
5583	regexp.set_is_complex();
5584	regexp.set_is_global(); // All dart regexps are global.
5585
5586	if (!FLAG_interpret_irregexp) {
5587	const Library& lib = Library::Handle(zone, ptr: Library::CoreLibrary());
5588	const Class& owner =
5589	Class::Handle(zone, ptr: lib.LookupClass(name: Symbols::RegExp()));
5590
5591	for (intptr_t cid = kOneByteStringCid; cid <= kExternalTwoByteStringCid;
5592	cid++) {
5593	CreateSpecializedFunction(thread, zone, regexp, specialization_cid: cid, /sticky=/false,
5594	owner);
5595	CreateSpecializedFunction(thread, zone, regexp, specialization_cid: cid, /sticky=/true,
5596	owner);
5597	}
5598	}
5599
5600	return regexp.ptr();
5601	}
5602
5603	} // namespace dart
5604

source code of flutter_engine/third_party/dart/runtime/vm/regexp.cc