// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2003-2016 International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.dev.test.rbbi;


import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

// Monkey testing of RuleBasedBreakIterator.
//    The old monkey test, now using regexes generated by the Unicode tools.
//    The new monkey test is class RBBIMonkeyTest.

import com.ibm.icu.dev.test.CoreTestFmwk;
import com.ibm.icu.dev.test.rbbi.SegmentationRule.BreakContext;
import com.ibm.icu.dev.test.rbbi.SegmentationRule.Resolution;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;


/**
 * Monkey tests for RBBI.  These tests have independent implementations of
 * the Unicode TR boundary rules, and compare results between these and ICU's
 * implementation, using random data.
 *
 * Tests cover Grapheme Cluster (char), Word and Line breaks
 *
 * Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
 *
 */
@RunWith(JUnit4.class)
public class RBBITestMonkey extends CoreTestFmwk {
    //
    //     class RBBIMonkeyKind
    //
    //        Monkey Test for Break Iteration
    //        Abstract interface class.   Concrete derived classes independently
    //        implement the break rules for different iterator types.
    //
    //        The Monkey Test itself uses doesn't know which type of break iterator it is
    //        testing, but works purely in terms of the interface defined here.
    //
    abstract static class RBBIMonkeyKind {
        RBBIMonkeyKind() {
            sets = new  ArrayList<>();
            classNames = new ArrayList<>();
            fAppliedRules = new ArrayList<>();
            dictionarySet = new UnicodeSet();
        }

        // Return a List of UnicodeSets, representing the character classes used
        //   for this type of iterator.
        List<UnicodeSet>  charClasses() {
            return sets;
        }

        // Set the test text on which subsequent calls to next() will operate
        void setText(StringBuffer s) {
            text = s;
            prepareAppliedRules(s.length());
            StringBuilder remapped = new StringBuilder(s.toString());
            resolved = new BreakContext[s.length() + 1];
            for (int i = 0; i < resolved.length; ++i) {
                resolved[i] = new BreakContext(i);
            }
            for (final SegmentationRule rule : rules) {
                rule.apply(remapped, resolved);
            }
            for (int i = 0; i < resolved.length; ++i) {
                if (i > 0 && i < s.length() &&
                        UTF16.isLeadSurrogate(s.charAt(i-1)) && UTF16.isTrailSurrogate(s.charAt(i))) {
                    continue;
                }
                if (resolved[i].appliedRule == null) {
                    throw new IllegalArgumentException("Failed to resolve at " + i);
                }
                setAppliedRule(i, resolved[i].appliedRule.name());
            }
        }

        // Find the next break position, starting from the specified position.
        // Return -1 after reaching end of string.
        int   next(int startPos) {
            for (int i = startPos + 1; i < resolved.length; ++i) {
                if (resolved[i].appliedRule != null &&
                        resolved[i].appliedRule.resolution() == Resolution.BREAK) {
                    return i;
                }
            }
            return -1;
        }

        // Name of each character class, parallel with charClasses. Used for debugging output
        // of characters.
        List<String> characterClassNames() {
            return classNames;
        }

        UnicodeSet getDictionarySet() {
            return dictionarySet;
        }

        void setAppliedRule(int position, String value) {
            fAppliedRules.set(position, value);
        }

        String getAppliedRule(int position) {
            return fAppliedRules.get(position);
        }

        String classNameFromCodepoint(int c) {
            // Simply iterate through fSets to find character's class
            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
                UnicodeSet classSet = (UnicodeSet)charClasses().get(aClassNum);
                if (classSet.contains(c)) {
                    return classNames.get(aClassNum);
                }
            }
            return "bad class name";
        }

        int maxClassNameSize() {
            int maxSize = 0;
            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
                if (classNames.get(aClassNum).length() > maxSize) {
                    maxSize = classNames.get(aClassNum).length();
                }
            }
            return maxSize;
        }

        // Clear `appliedRules` and fill it with empty strings in the size of test text.
        void prepareAppliedRules(int size) {
            // Remove all the information in the `appliedRules`.
            fAppliedRules.clear();
            fAppliedRules.ensureCapacity(size + 1);
            while (fAppliedRules.size() < size + 1) {
                fAppliedRules.add("");
            }
        }

        static class NamedSet {
            String name;
            UnicodeSet set;
            NamedSet(String name, UnicodeSet set) {
                this.name = name;
                this.set = set;
            }
            NamedSet(String name, String pattern) {
                this(name, new UnicodeSet(pattern));
            }
        };

        // A Character Property, one of the constants defined in class UProperty.
        //   The value of this property will be displayed for the characters
        //    near any test failure.
        int   fCharProperty;

        List<UnicodeSet> sets;
        ArrayList<String> classNames;
        List<SegmentationRule> rules;
        UnicodeSet dictionarySet;
        private ArrayList<String> fAppliedRules;
        private StringBuffer text;
        private SegmentationRule.BreakContext[] resolved;
    }

    /**
     * Monkey test subclass for testing Character (Grapheme Cluster) boundaries.
     */
    static class RBBICharMonkey extends RBBIMonkeyKind {
        RBBICharMonkey() {
            fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("GB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("GB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Grapheme_Cluster_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LF}]")));
            partition.add(new NamedSet("Control", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Control}]")));
            partition.add(new NamedSet("Extend_ConjunctLinker", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Extend}&\\p{Indic_Conjunct_Break=Linker}]")));
            partition.add(new NamedSet("Extend_ConjunctExtendermConjunctLinker", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Extend}&[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]-\\p{Indic_Conjunct_Break=Linker}]")));
            partition.add(new NamedSet("ExtendmConjunctLinkermConjunctExtender", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Extend}-\\p{Indic_Conjunct_Break=Linker}-[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Grapheme_Cluster_Break=ZWJ}]")));
            partition.add(new NamedSet("RI", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Regional_Indicator}]")));
            partition.add(new NamedSet("Prepend", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Prepend}]")));
            partition.add(new NamedSet("SpacingMark", new UnicodeSet("[\\p{Grapheme_Cluster_Break=SpacingMark}]")));
            partition.add(new NamedSet("L", new UnicodeSet("[\\p{Grapheme_Cluster_Break=L}]")));
            partition.add(new NamedSet("V", new UnicodeSet("[\\p{Grapheme_Cluster_Break=V}]")));
            partition.add(new NamedSet("T", new UnicodeSet("[\\p{Grapheme_Cluster_Break=T}]")));
            partition.add(new NamedSet("LV", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LV}]")));
            partition.add(new NamedSet("LVT", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LVT}]")));
            partition.add(new NamedSet("LinkingConsonant", new UnicodeSet("[\\p{Indic_Conjunct_Break=Consonant}]")));
            partition.add(new NamedSet("ExtPict", new UnicodeSet("[\\p{Extended_Pictographic}]")));
            partition.add(new NamedSet("XXmLinkingConsonantmExtPict", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Other}-\\p{Indic_Conjunct_Break=Consonant}-\\p{Extended_Pictographic}]")));

            rules.add(new RegexRule("$CR × $LF", "\\p{Grapheme_Cluster_Break=CR}", Resolution.NO_BREAK, "\\p{Grapheme_Cluster_Break=LF}"));
            rules.add(new RegexRule("( $Control | $CR | $LF ) ÷", "( \\p{Grapheme_Cluster_Break=Control} | \\p{Grapheme_Cluster_Break=CR} | \\p{Grapheme_Cluster_Break=LF} )", Resolution.BREAK, ""));
            rules.add(new RegexRule("÷ ( $Control | $CR | $LF )", "", Resolution.BREAK, "( \\p{Grapheme_Cluster_Break=Control} | \\p{Grapheme_Cluster_Break=CR} | \\p{Grapheme_Cluster_Break=LF} )"));
            rules.add(new RegexRule("$L × ( $L | $V | $LV | $LVT )", "\\p{Grapheme_Cluster_Break=L}", Resolution.NO_BREAK, "( \\p{Grapheme_Cluster_Break=L} | \\p{Grapheme_Cluster_Break=V} | \\p{Grapheme_Cluster_Break=LV} | \\p{Grapheme_Cluster_Break=LVT} )"));
            rules.add(new RegexRule("( $LV | $V ) × ( $V | $T )", "( \\p{Grapheme_Cluster_Break=LV} | \\p{Grapheme_Cluster_Break=V} )", Resolution.NO_BREAK, "( \\p{Grapheme_Cluster_Break=V} | \\p{Grapheme_Cluster_Break=T} )"));
            rules.add(new RegexRule("( $LVT | $T) × $T", "( \\p{Grapheme_Cluster_Break=LVT} | \\p{Grapheme_Cluster_Break=T})", Resolution.NO_BREAK, "\\p{Grapheme_Cluster_Break=T}"));
            rules.add(new RegexRule("× ($Extend | $ZWJ)", "", Resolution.NO_BREAK, "(\\p{Grapheme_Cluster_Break=Extend} | \\p{Grapheme_Cluster_Break=ZWJ})"));
            rules.add(new RegexRule("× $SpacingMark", "", Resolution.NO_BREAK, "\\p{Grapheme_Cluster_Break=SpacingMark}"));
            rules.add(new RegexRule("$Prepend ×", "\\p{Grapheme_Cluster_Break=Prepend}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("$LinkingConsonant $ConjunctExtender* $ConjunctLinker $ConjunctExtender* × $LinkingConsonant", "\\p{Indic_Conjunct_Break=Consonant} [\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]* \\p{Indic_Conjunct_Break=Linker} [\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]*", Resolution.NO_BREAK, "\\p{Indic_Conjunct_Break=Consonant}"));
            rules.add(new RegexRule("$ExtPict $Extend* $ZWJ × $ExtPict", "\\p{Extended_Pictographic} \\p{Grapheme_Cluster_Break=Extend}* \\p{Grapheme_Cluster_Break=ZWJ}", Resolution.NO_BREAK, "\\p{Extended_Pictographic}"));
            rules.add(new RegexRule("^ ($RI $RI)* $RI × $RI", "^ (\\p{Grapheme_Cluster_Break=Regional_Indicator} \\p{Grapheme_Cluster_Break=Regional_Indicator})* \\p{Grapheme_Cluster_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Grapheme_Cluster_Break=Regional_Indicator}"));
            rules.add(new RegexRule("[^$RI] ($RI $RI)* $RI × $RI", "[^\\p{Grapheme_Cluster_Break=Regional_Indicator}] (\\p{Grapheme_Cluster_Break=Regional_Indicator} \\p{Grapheme_Cluster_Break=Regional_Indicator})* \\p{Grapheme_Cluster_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Grapheme_Cluster_Break=Regional_Indicator}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }

    /**
     *
     * Word Monkey Test Class
     *
     *
     *
     */
    static class RBBIWordMonkey extends RBBIMonkeyKind {
        RBBIWordMonkey() {
            fCharProperty    = UProperty.WORD_BREAK;
            dictionarySet = new UnicodeSet("[[\uac00-\ud7a3][:Han:][:Hiragana:]]");
            dictionarySet.addAll(new UnicodeSet("[\\p{Word_Break = Katakana}]"));
            dictionarySet.addAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("WB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("WB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Word_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Word_Break=LF}]")));
            partition.add(new NamedSet("Newline", new UnicodeSet("[\\p{Word_Break=Newline}]")));
            partition.add(new NamedSet("Extend", new UnicodeSet("[\\p{Word_Break=Extend}]")));
            partition.add(new NamedSet("Format", new UnicodeSet("[[\\p{Word_Break=Format}]]")));
            partition.add(new NamedSet("Katakana", new UnicodeSet("[\\p{Word_Break=Katakana}]")));
            partition.add(new NamedSet("ALetter_ExtPict", new UnicodeSet("[\\p{Word_Break=ALetter}&\\p{Extended_Pictographic}]")));
            partition.add(new NamedSet("ALettermExtPict", new UnicodeSet("[\\p{Word_Break=ALetter}-\\p{Extended_Pictographic}]")));
            partition.add(new NamedSet("MidLetter", new UnicodeSet("[\\p{Word_Break=MidLetter}]")));
            partition.add(new NamedSet("MidNum", new UnicodeSet("[\\p{Word_Break=MidNum}]")));
            partition.add(new NamedSet("MidNumLet", new UnicodeSet("[\\p{Word_Break=MidNumLet}]")));
            partition.add(new NamedSet("Numeric", new UnicodeSet("[\\p{Word_Break=Numeric}]")));
            partition.add(new NamedSet("ExtendNumLet", new UnicodeSet("[\\p{Word_Break=ExtendNumLet}]")));
            partition.add(new NamedSet("RI", new UnicodeSet("[\\p{Word_Break=Regional_Indicator}]")));
            partition.add(new NamedSet("Hebrew_Letter", new UnicodeSet("[\\p{Word_Break=Hebrew_Letter}]")));
            partition.add(new NamedSet("Double_Quote", new UnicodeSet("[\\p{Word_Break=Double_Quote}]")));
            partition.add(new NamedSet("Single_Quote", new UnicodeSet("[\\p{Word_Break=Single_Quote}]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Word_Break=ZWJ}]")));
            partition.add(new NamedSet("ExtPictmALetter", new UnicodeSet("[\\p{Extended_Pictographic}-\\p{Word_Break=ALetter}]")));
            partition.add(new NamedSet("WSegSpace", new UnicodeSet("[\\p{Word_Break=WSegSpace}]")));
            partition.add(new NamedSet("XXmExtPict", new UnicodeSet("[\\p{Word_Break=Other}-\\p{Extended_Pictographic}]")));

            rules.add(new RegexRule("$CR × $LF", "\\p{Word_Break=CR}", Resolution.NO_BREAK, "\\p{Word_Break=LF}"));
            rules.add(new RegexRule("($Newline | $CR | $LF) ÷", "(\\p{Word_Break=Newline} | \\p{Word_Break=CR} | \\p{Word_Break=LF})", Resolution.BREAK, ""));
            rules.add(new RegexRule("÷ ($Newline | $CR | $LF)", "", Resolution.BREAK, "(\\p{Word_Break=Newline} | \\p{Word_Break=CR} | \\p{Word_Break=LF})"));
            rules.add(new RegexRule("$ZWJ × $ExtPict", "\\p{Word_Break=ZWJ}", Resolution.NO_BREAK, "\\p{Extended_Pictographic}"));
            rules.add(new RegexRule("$WSegSpace × $WSegSpace", "\\p{Word_Break=WSegSpace}", Resolution.NO_BREAK, "\\p{Word_Break=WSegSpace}"));
            rules.add(new RemapRule("(?<X>[^$CR $LF $Newline]) ($Extend | $Format | $ZWJ)* → ${X}", "(?<X>[^\\p{Word_Break=CR} \\p{Word_Break=LF} \\p{Word_Break=Newline}]) (\\p{Word_Break=Extend} | [\\p{Word_Break=Format}] | \\p{Word_Break=ZWJ})*", "${X}"));
            rules.add(new RegexRule("$AHLetter × $AHLetter", "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]", Resolution.NO_BREAK, "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(new RegexRule("$AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter", "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]", Resolution.NO_BREAK, "(\\p{Word_Break=MidLetter} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}]) [\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(new RegexRule("$AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter", "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] (\\p{Word_Break=MidLetter} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}])", Resolution.NO_BREAK, "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(new RegexRule("$Hebrew_Letter × $Single_Quote", "\\p{Word_Break=Hebrew_Letter}", Resolution.NO_BREAK, "\\p{Word_Break=Single_Quote}"));
            rules.add(new RegexRule("$Hebrew_Letter × $Double_Quote $Hebrew_Letter", "\\p{Word_Break=Hebrew_Letter}", Resolution.NO_BREAK, "\\p{Word_Break=Double_Quote} \\p{Word_Break=Hebrew_Letter}"));
            rules.add(new RegexRule("$Hebrew_Letter $Double_Quote × $Hebrew_Letter", "\\p{Word_Break=Hebrew_Letter} \\p{Word_Break=Double_Quote}", Resolution.NO_BREAK, "\\p{Word_Break=Hebrew_Letter}"));
            rules.add(new RegexRule("$Numeric × $Numeric", "\\p{Word_Break=Numeric}", Resolution.NO_BREAK, "\\p{Word_Break=Numeric}"));
            rules.add(new RegexRule("$AHLetter × $Numeric", "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]", Resolution.NO_BREAK, "\\p{Word_Break=Numeric}"));
            rules.add(new RegexRule("$Numeric × $AHLetter", "\\p{Word_Break=Numeric}", Resolution.NO_BREAK, "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(new RegexRule("$Numeric ($MidNum | $MidNumLetQ) × $Numeric", "\\p{Word_Break=Numeric} (\\p{Word_Break=MidNum} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}])", Resolution.NO_BREAK, "\\p{Word_Break=Numeric}"));
            rules.add(new RegexRule("$Numeric × ($MidNum | $MidNumLetQ) $Numeric", "\\p{Word_Break=Numeric}", Resolution.NO_BREAK, "(\\p{Word_Break=MidNum} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}]) \\p{Word_Break=Numeric}"));
            rules.add(new RegexRule("$Katakana × $Katakana", "\\p{Word_Break=Katakana}", Resolution.NO_BREAK, "\\p{Word_Break=Katakana}"));
            rules.add(new RegexRule("($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet", "([\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] | \\p{Word_Break=Numeric} | \\p{Word_Break=Katakana} | \\p{Word_Break=ExtendNumLet})", Resolution.NO_BREAK, "\\p{Word_Break=ExtendNumLet}"));
            rules.add(new RegexRule("$ExtendNumLet × ($AHLetter | $Numeric | $Katakana)", "\\p{Word_Break=ExtendNumLet}", Resolution.NO_BREAK, "([\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] | \\p{Word_Break=Numeric} | \\p{Word_Break=Katakana})"));
            rules.add(new RegexRule("^ ($RI $RI)* $RI × $RI", "^ (\\p{Word_Break=Regional_Indicator} \\p{Word_Break=Regional_Indicator})* \\p{Word_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Word_Break=Regional_Indicator}"));
            rules.add(new RegexRule("[^$RI] ($RI $RI)* $RI × $RI", "[^\\p{Word_Break=Regional_Indicator}] (\\p{Word_Break=Regional_Indicator} \\p{Word_Break=Regional_Indicator})* \\p{Word_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Word_Break=Regional_Indicator}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }


    static class RBBILineMonkey extends RBBIMonkeyKind {
        RBBILineMonkey()
        {
            fCharProperty  = UProperty.LINE_BREAK;
            
            dictionarySet = new UnicodeSet("\\p{lb=SA}");

            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            rules.add(new RegexRule("sot ÷ contra LB2", "^", Resolution.BREAK, ""));
            // This one could be part of the rules.
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("LB3 ÷ eot", "", Resolution.BREAK, "(?!.)"));


            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("AI_EastAsian", new UnicodeSet("[\\p{Line_Break=Ambiguous}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("AImEastAsian", new UnicodeSet("[\\p{Line_Break=Ambiguous}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("AK", new UnicodeSet("[\\p{Line_Break=Aksara}]")));
            partition.add(new NamedSet("ALorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Alphabetic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("ALorig_DottedCircle", new UnicodeSet("[\\p{Line_Break=Alphabetic}&[◌]]")));
            partition.add(new NamedSet("ALorigmEastAsianmDottedCircle", new UnicodeSet("[\\p{Line_Break=Alphabetic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[◌]]")));
            partition.add(new NamedSet("AP", new UnicodeSet("[\\p{Line_Break=Aksara_Prebase}]")));
            partition.add(new NamedSet("AS", new UnicodeSet("[\\p{Line_Break=Aksara_Start}]")));
            partition.add(new NamedSet("B2", new UnicodeSet("[\\p{Line_Break=Break_Both}]")));
            partition.add(new NamedSet("BA_EastAsian", new UnicodeSet("[\\p{Line_Break=Break_After}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("BAmEastAsian", new UnicodeSet("[\\p{Line_Break=Break_After}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("BB", new UnicodeSet("[\\p{Line_Break=Break_Before}]")));
            partition.add(new NamedSet("BK", new UnicodeSet("[\\p{Line_Break=Mandatory_Break}]")));
            partition.add(new NamedSet("CB", new UnicodeSet("[\\p{Line_Break=Contingent_Break}]")));
            partition.add(new NamedSet("CL_EastAsian", new UnicodeSet("[\\p{Line_Break=Close_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CLmEastAsian", new UnicodeSet("[\\p{Line_Break=Close_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CP", new UnicodeSet("[\\p{Line_Break=CP}]")));
            partition.add(new NamedSet("CMorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Combining_Mark}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CMorigmEastAsian", new UnicodeSet("[\\p{Line_Break=Combining_Mark}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Line_Break=Carriage_Return}]")));
            partition.add(new NamedSet("EX_EastAsian", new UnicodeSet("[\\p{Line_Break=Exclamation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("EXmEastAsian", new UnicodeSet("[\\p{Line_Break=Exclamation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("GL_EastAsian", new UnicodeSet("[\\p{Line_Break=Glue}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("GLmEastAsian", new UnicodeSet("[\\p{Line_Break=Glue}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("H2", new UnicodeSet("[\\p{Line_Break=H2}]")));
            partition.add(new NamedSet("H3", new UnicodeSet("[\\p{Line_Break=H3}]")));
            partition.add(new NamedSet("HH", new UnicodeSet("[\\p{Line_Break=Unambiguous_Hyphen}]")));
            partition.add(new NamedSet("HL", new UnicodeSet("[\\p{Line_Break=HL}]")));
            partition.add(new NamedSet("HY", new UnicodeSet("[\\p{Line_Break=Hyphen}]")));
            partition.add(new NamedSet("ID_EastAsian", new UnicodeSet("[\\p{Line_Break=Ideographic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("ID_ExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Ideographic}&[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(new NamedSet("IDmEastAsianmExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Ideographic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(new NamedSet("IN_EastAsian", new UnicodeSet("[\\p{Line_Break=Inseparable}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("INmEastAsian", new UnicodeSet("[\\p{Line_Break=Inseparable}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("IS", new UnicodeSet("[\\p{Line_Break=Infix_Numeric}]")));
            partition.add(new NamedSet("JL", new UnicodeSet("[\\p{Line_Break=JL}]")));
            partition.add(new NamedSet("JT", new UnicodeSet("[\\p{Line_Break=JT}]")));
            partition.add(new NamedSet("JV", new UnicodeSet("[\\p{Line_Break=JV}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Line_Break=Line_Feed}]")));
            partition.add(new NamedSet("NL", new UnicodeSet("[\\p{Line_Break=Next_Line}]")));
            partition.add(new NamedSet("NSorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Nonstarter}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("NSorigmEastAsian", new UnicodeSet("[\\p{Line_Break=Nonstarter}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("NU", new UnicodeSet("[\\p{Line_Break=Numeric}]")));
            partition.add(new NamedSet("OP_EastAsian", new UnicodeSet("[\\p{Line_Break=Open_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("OPmEastAsian", new UnicodeSet("[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("PO_EastAsian", new UnicodeSet("[\\p{Line_Break=Postfix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("POmEastAsian", new UnicodeSet("[\\p{Line_Break=Postfix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("PR_EastAsian", new UnicodeSet("[\\p{Line_Break=Prefix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("PRmEastAsian", new UnicodeSet("[\\p{Line_Break=Prefix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("QU_Pi", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pi}]")));
            partition.add(new NamedSet("QU_Pf", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pf}]")));
            partition.add(new NamedSet("QUmPimPf", new UnicodeSet("[\\p{Line_Break=Quotation}-\\p{gc=Pi}-\\p{gc=Pf}]")));
            partition.add(new NamedSet("SA_Mn", new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mn}]]")));
            partition.add(new NamedSet("SA_Mc", new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]]")));
            partition.add(new NamedSet("SAmMnmMc", new UnicodeSet("[[\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]]")));
            partition.add(new NamedSet("SG", new UnicodeSet("[\\p{Line_Break=Surrogate}]")));
            partition.add(new NamedSet("SP", new UnicodeSet("[\\p{Line_Break=Space}]")));
            partition.add(new NamedSet("SY", new UnicodeSet("[\\p{Line_Break=Break_Symbols}]")));
            partition.add(new NamedSet("VF", new UnicodeSet("[\\p{Line_Break=Virama_Final}]")));
            partition.add(new NamedSet("VI", new UnicodeSet("[\\p{Line_Break=Virama}]")));
            partition.add(new NamedSet("WJ", new UnicodeSet("[\\p{Line_Break=Word_Joiner}]")));
            partition.add(new NamedSet("XX_ExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Unknown}&[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(new NamedSet("XXmExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Unknown}-[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(new NamedSet("ZW", new UnicodeSet("[\\p{Line_Break=ZWSpace}]")));
            partition.add(new NamedSet("CJ", new UnicodeSet("[\\p{Line_Break=Conditional_Japanese_Starter}]")));
            partition.add(new NamedSet("RI", new UnicodeSet("[\\p{Line_Break=Regional_Indicator}]")));
            partition.add(new NamedSet("EB_EastAsian", new UnicodeSet("[\\p{Line_Break=E_Base}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("EBmEastAsian", new UnicodeSet("[\\p{Line_Break=E_Base}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("EM", new UnicodeSet("[\\p{Line_Break=E_Modifier}]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Line_Break=ZWJ}]")));

            rules.add(new RegexRule("$BK ÷", "\\p{Line_Break=Mandatory_Break}", Resolution.BREAK, ""));
            rules.add(new RegexRule("$CR × $LF", "\\p{Line_Break=Carriage_Return}", Resolution.NO_BREAK, "\\p{Line_Break=Line_Feed}"));
            rules.add(new RegexRule("$CR ÷", "\\p{Line_Break=Carriage_Return}", Resolution.BREAK, ""));
            rules.add(new RegexRule("$LF ÷", "\\p{Line_Break=Line_Feed}", Resolution.BREAK, ""));
            rules.add(new RegexRule("$NL ÷", "\\p{Line_Break=Next_Line}", Resolution.BREAK, ""));
            rules.add(new RegexRule("× ( $BK | $CR | $LF | $NL )", "", Resolution.NO_BREAK, "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} )"));
            rules.add(new RegexRule("× $SP", "", Resolution.NO_BREAK, "\\p{Line_Break=Space}"));
            rules.add(new RegexRule("× $ZW", "", Resolution.NO_BREAK, "\\p{Line_Break=ZWSpace}"));
            rules.add(new RegexRule("$ZW $SP* ÷", "\\p{Line_Break=ZWSpace} \\p{Line_Break=Space}*", Resolution.BREAK, ""));
            rules.add(new RegexRule("$ZWJ ×", "\\p{Line_Break=ZWJ}", Resolution.NO_BREAK, ""));
            rules.add(new RemapRule("(?<X>[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X}", "(?<X>[^\\p{Line_Break=Mandatory_Break} \\p{Line_Break=Carriage_Return} \\p{Line_Break=Line_Feed} \\p{Line_Break=Next_Line} \\p{Line_Break=Space} \\p{Line_Break=ZWSpace}]) ( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )*", "${X}"));
            rules.add(new RemapRule("( $CM | $ZWJ ) → A", "( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )", "A"));
            rules.add(new RegexRule("× $WJ", "", Resolution.NO_BREAK, "\\p{Line_Break=Word_Joiner}"));
            rules.add(new RegexRule("$WJ ×", "\\p{Line_Break=Word_Joiner}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("$GL ×", "\\p{Line_Break=Glue}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("[^ $SP $BA $HY $HH] × $GL", "[^ \\p{Line_Break=Space} \\p{Line_Break=Break_After} \\p{Line_Break=Hyphen} \\p{Line_Break=Unambiguous_Hyphen}]", Resolution.NO_BREAK, "\\p{Line_Break=Glue}"));
            rules.add(new RegexRule("× $EX", "", Resolution.NO_BREAK, "\\p{Line_Break=Exclamation}"));
            rules.add(new RegexRule("× $CL", "", Resolution.NO_BREAK, "\\p{Line_Break=Close_Punctuation}"));
            rules.add(new RegexRule("× $CP", "", Resolution.NO_BREAK, "\\p{Line_Break=CP}"));
            rules.add(new RegexRule("× $SY", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_Symbols}"));
            rules.add(new RegexRule("$OP $SP* ×", "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Space}*", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("( $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW | $sot ) $QU_Pi $SP* ×", "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Open_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=Glue} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | ^ ) [\\p{Line_Break=Quotation} & \\p{gc=Pi}] \\p{Line_Break=Space}*", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("× $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot )", "", Resolution.NO_BREAK, "[\\p{Line_Break=Quotation} & \\p{gc=Pf}] ( \\p{Line_Break=Space} | \\p{Line_Break=Glue} | \\p{Line_Break=Word_Joiner} | \\p{Line_Break=Close_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=CP} | \\p{Line_Break=Exclamation} | \\p{Line_Break=Infix_Numeric} | \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=ZWSpace} | (?!.) )"));
            rules.add(new RegexRule("$SP ÷ $IS $NU", "\\p{Line_Break=Space}", Resolution.BREAK, "\\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("× $IS", "", Resolution.NO_BREAK, "\\p{Line_Break=Infix_Numeric}"));
            rules.add(new RegexRule("($CL | $CP) $SP* × $NS", "(\\p{Line_Break=Close_Punctuation} | \\p{Line_Break=CP}) \\p{Line_Break=Space}*", Resolution.NO_BREAK, "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
            rules.add(new RegexRule("$B2 $SP* × $B2", "\\p{Line_Break=Break_Both} \\p{Line_Break=Space}*", Resolution.NO_BREAK, "\\p{Line_Break=Break_Both}"));
            rules.add(new RegexRule("$SP ÷", "\\p{Line_Break=Space}", Resolution.BREAK, ""));
            rules.add(new RegexRule("× $QUmPi", "", Resolution.NO_BREAK, "[\\p{Line_Break=Quotation} - \\p{gc=Pi}]"));
            rules.add(new RegexRule("$QUmPf ×", "[\\p{Line_Break=Quotation} - \\p{gc=Pf}]", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("[^$EastAsian] × $QU", "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", Resolution.NO_BREAK, "\\p{Line_Break=Quotation}"));
            rules.add(new RegexRule("× $QU ( [^$EastAsian] | $eot )", "", Resolution.NO_BREAK, "\\p{Line_Break=Quotation} ( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | (?!.) )"));
            rules.add(new RegexRule("$QU × [^$EastAsian]", "\\p{Line_Break=Quotation}", Resolution.NO_BREAK, "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
            rules.add(new RegexRule("( [^$EastAsian] | $sot ) $QU ×", "( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | ^ ) \\p{Line_Break=Quotation}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("÷ $CB", "", Resolution.BREAK, "\\p{Line_Break=Contingent_Break}"));
            rules.add(new RegexRule("$CB ÷", "\\p{Line_Break=Contingent_Break}", Resolution.BREAK, ""));
            rules.add(new RegexRule("( $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL | $sot ) ( $HY | $HH ) × ( $AL | $HL )", "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | \\p{Line_Break=Contingent_Break} | \\p{Line_Break=Glue} | ^ ) ( \\p{Line_Break=Hyphen} | \\p{Line_Break=Unambiguous_Hyphen} )", Resolution.NO_BREAK, "( [\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} )"));
            rules.add(new RegexRule("× $BA", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_After}"));
            rules.add(new RegexRule("× $HH", "", Resolution.NO_BREAK, "\\p{Line_Break=Unambiguous_Hyphen}"));
            rules.add(new RegexRule("× $HY", "", Resolution.NO_BREAK, "\\p{Line_Break=Hyphen}"));
            rules.add(new RegexRule("× $NS", "", Resolution.NO_BREAK, "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
            rules.add(new RegexRule("$BB ×", "\\p{Line_Break=Break_Before}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("$HL ($HY | $HH) × [^$HL]", "\\p{Line_Break=HL} (\\p{Line_Break=Hyphen} | \\p{Line_Break=Unambiguous_Hyphen})", Resolution.NO_BREAK, "[^\\p{Line_Break=HL}]"));
            rules.add(new RegexRule("$SY × $HL", "\\p{Line_Break=Break_Symbols}", Resolution.NO_BREAK, "\\p{Line_Break=HL}"));
            rules.add(new RegexRule("× $IN", "", Resolution.NO_BREAK, "\\p{Line_Break=Inseparable}"));
            rules.add(new RegexRule("($AL | $HL) × $NU", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$NU × ($AL | $HL)", "\\p{Line_Break=Numeric}", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(new RegexRule("$PR × ($ID | $EB | $EM)", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})"));
            rules.add(new RegexRule("($ID | $EB | $EM) × $PO", "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(new RegexRule("($PR | $PO) × ($AL | $HL)", "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(new RegexRule("($AL | $HL) × ($PR | $PO)", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* $CL × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* $CP × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* $CL × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* $CP × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(new RegexRule("$PO × $OP $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$PO × $OP $IS $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$PO × $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$PR × $OP $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$PR × $OP $IS $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$PR × $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$HY × $NU", "\\p{Line_Break=Hyphen}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$IS × $NU", "\\p{Line_Break=Infix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$NU ( $SY | $IS )* × $NU", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
            rules.add(new RegexRule("$JL × $JL | $JV | $H2 | $H3", "\\p{Line_Break=JL}", Resolution.NO_BREAK, "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
            rules.add(new RegexRule("$JV | $H2 × $JV | $JT", "\\p{Line_Break=JV} | \\p{Line_Break=H2}", Resolution.NO_BREAK, "\\p{Line_Break=JV} | \\p{Line_Break=JT}"));
            rules.add(new RegexRule("$JT | $H3 × $JT", "\\p{Line_Break=JT} | \\p{Line_Break=H3}", Resolution.NO_BREAK, "\\p{Line_Break=JT}"));
            rules.add(new RegexRule("$JL | $JV | $JT | $H2 | $H3 × $PO", "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(new RegexRule("$PR × $JL | $JV | $JT | $H2 | $H3", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
            rules.add(new RegexRule("($AL | $HL) × ($AL | $HL)", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(new RegexRule("$AP × ($AK | $DottedCircle | $AS)", "\\p{Line_Break=Aksara_Prebase}", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})"));
            rules.add(new RegexRule("($AK | $DottedCircle | $AS) × ($VF | $VI)", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})", Resolution.NO_BREAK, "(\\p{Line_Break=Virama_Final} | \\p{Line_Break=Virama})"));
            rules.add(new RegexRule("($AK | $DottedCircle | $AS) $VI × ($AK | $DottedCircle)", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama}", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌])"));
            rules.add(new RegexRule("($AK | $DottedCircle | $AS) × ($AK | $DottedCircle | $AS) $VF", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama_Final}"));
            rules.add(new RegexRule("$IS × ($AL | $HL)", "\\p{Line_Break=Infix_Numeric}", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(new RegexRule("($AL | $HL | $NU) × $OPmEastAsian", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})", Resolution.NO_BREAK, "[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
            rules.add(new RegexRule("$CPmEastAsian × ($AL | $HL | $NU)", "[\\p{Line_Break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})"));
            rules.add(new RegexRule("$sot ($RI $RI)* $RI × $RI", "^ (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Line_Break=Regional_Indicator}"));
            rules.add(new RegexRule("[^$RI] ($RI $RI)* $RI × $RI", "[^\\p{Line_Break=Regional_Indicator}] (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Line_Break=Regional_Indicator}"));
            rules.add(new RegexRule("$RI ÷ $RI", "\\p{Line_Break=Regional_Indicator}", Resolution.BREAK, "\\p{Line_Break=Regional_Indicator}"));
            rules.add(new RegexRule("$EB × $EM", "\\p{Line_Break=E_Base}", Resolution.NO_BREAK, "\\p{Line_Break=E_Modifier}"));
            rules.add(new RegexRule("$ExtPictUnassigned × $EM", "[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]", Resolution.NO_BREAK, "\\p{Line_Break=E_Modifier}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }


    /**
     *
     * Sentence Monkey Test Class
     *
     *
     *
     */
    static class RBBISentenceMonkey extends RBBIMonkeyKind {

        RBBISentenceMonkey() {
            fCharProperty  = UProperty.SENTENCE_BREAK;
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("WB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("WB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Sentence_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Sentence_Break=LF}]")));
            partition.add(new NamedSet("Extend", new UnicodeSet("[\\p{Sentence_Break=Extend}]")));
            partition.add(new NamedSet("Format", new UnicodeSet("[\\p{Sentence_Break=Format}]")));
            partition.add(new NamedSet("Sep", new UnicodeSet("[\\p{Sentence_Break=Sep}]")));
            partition.add(new NamedSet("Sp", new UnicodeSet("[\\p{Sentence_Break=Sp}]")));
            partition.add(new NamedSet("Lower", new UnicodeSet("[\\p{Sentence_Break=Lower}]")));
            partition.add(new NamedSet("Upper", new UnicodeSet("[\\p{Sentence_Break=Upper}]")));
            partition.add(new NamedSet("OLetter", new UnicodeSet("[\\p{Sentence_Break=OLetter}]")));
            partition.add(new NamedSet("Numeric", new UnicodeSet("[\\p{Sentence_Break=Numeric}]")));
            partition.add(new NamedSet("ATerm", new UnicodeSet("[\\p{Sentence_Break=ATerm}]")));
            partition.add(new NamedSet("STerm", new UnicodeSet("[\\p{Sentence_Break=STerm}]")));
            partition.add(new NamedSet("Close", new UnicodeSet("[\\p{Sentence_Break=Close}]")));
            partition.add(new NamedSet("SContinue", new UnicodeSet("[\\p{Sentence_Break=SContinue}]")));
            partition.add(new NamedSet("XX", new UnicodeSet("[\\p{Sentence_Break=Other}]")));

            rules.add(new RegexRule("$CR × $LF", "\\p{Sentence_Break=CR}", Resolution.NO_BREAK, "\\p{Sentence_Break=LF}"));
            rules.add(new RegexRule("$ParaSep ÷", "[\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]", Resolution.BREAK, ""));
            rules.add(new RemapRule("(?<X>[^$ParaSep]) ( $Extend | $Format )* → ${X}", "(?<X>[^[\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]]) ( \\p{Sentence_Break=Extend} | \\p{Sentence_Break=Format} )*", "${X}"));
            rules.add(new RegexRule("$ATerm × $Numeric", "\\p{Sentence_Break=ATerm}", Resolution.NO_BREAK, "\\p{Sentence_Break=Numeric}"));
            rules.add(new RegexRule("($Upper | $Lower) $ATerm × $Upper", "(\\p{Sentence_Break=Upper} | \\p{Sentence_Break=Lower}) \\p{Sentence_Break=ATerm}", Resolution.NO_BREAK, "\\p{Sentence_Break=Upper}"));
            rules.add(new RegexRule("$ATerm $Close* $Sp* × [^ $OLetter $Upper $Lower $ParaSep $SATerm]* $Lower", "\\p{Sentence_Break=ATerm} \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*", Resolution.NO_BREAK, "[^ \\p{Sentence_Break=OLetter} \\p{Sentence_Break=Upper} \\p{Sentence_Break=Lower} [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] [\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}]]* \\p{Sentence_Break=Lower}"));
            rules.add(new RegexRule("$SATerm $Close* $Sp* × ($SContinue | $SATerm)", "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*", Resolution.NO_BREAK, "(\\p{Sentence_Break=SContinue} | [\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}])"));
            rules.add(new RegexRule("$SATerm $Close* × ( $Close | $Sp | $ParaSep )", "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}*", Resolution.NO_BREAK, "( \\p{Sentence_Break=Close} | \\p{Sentence_Break=Sp} | [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] )"));
            rules.add(new RegexRule("$SATerm $Close* $Sp* × ( $Sp | $ParaSep )", "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*", Resolution.NO_BREAK, "( \\p{Sentence_Break=Sp} | [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] )"));
            rules.add(new RegexRule("$SATerm $Close* $Sp* $ParaSep? ÷", "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}* [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]?", Resolution.BREAK, ""));
            rules.add(new RegexRule("× $Any", "", Resolution.NO_BREAK, "."));
            // --- End of generated code. ---

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }

    }


    /**
     * Move an index into a string by n code points.
     *   Similar to UTF16.moveCodePointOffset, but without the exceptions, which were
     *   complicating usage.
     * @param s   a Text string
     * @param pos The starting code unit index into the text string
     * @param amt The amount to adjust the string by.
     * @return    The adjusted code unit index, pinned to the string's length, or
     *            unchanged if input index was outside of the string.
     */
    static int moveIndex32(StringBuffer s, int pos, int amt) {
        int i;
        char  c;
        if (amt>0) {
            for (i=0; i<amt; i++) {
                if (pos >= s.length()) {
                    return s.length();
                }
                c = s.charAt(pos);
                pos++;
                if (UTF16.isLeadSurrogate(c) && pos < s.length()) {
                    c = s.charAt(pos);
                    if (UTF16.isTrailSurrogate(c)) {
                        pos++;
                    }
                }
            }
        } else {
            for (i=0; i>amt; i--) {
                if (pos <= 0) {
                    return 0;
                }
                pos--;
                c = s.charAt(pos);
                if (UTF16.isTrailSurrogate(c) && pos > 0) {
                    c = s.charAt(pos - 1);
                    if (UTF16.isLeadSurrogate(c)) {
                        pos--;
                    }
                }
            }
        }
        return pos;
    }

    /**
     * No-exceptions form of UnicodeSet.contains(c).
     *    Simplifies loops that terminate with an end-of-input character value.
     * @param s  A unicode set
     * @param c  A code point value
     * @return   true if the set contains c.
     */
    static boolean setContains(UnicodeSet s, int c) {
        if (c<0 || c>UTF16.CODEPOINT_MAX_VALUE ) {
            return false;
        }
        return s.contains(c);
    }


    /**
     * return the index of the next code point in the input text.
     * @param i the preceding index
     */
    static int  nextCP(StringBuffer s, int i) {
        if (i == -1) {
            // End of Input indication.  Continue to return end value.
            return -1;
        }
        int  retVal = i + 1;
        if (retVal > s.length()) {
            return -1;
        }
        int  c = UTF16.charAt(s, i);
        if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && UTF16.isLeadSurrogate(s.charAt(i))) {
            retVal++;
        }
        return retVal;
    }



        

    /**
     * random number generator.  Not using Java's built-in Randoms for two reasons:
     *    1.  Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
     *    2.  We need to get and restore the seed from values occurring in the middle
     *        of a long sequence, to more easily reproduce failing cases.
     * TODO(egg): We need a better random number generator; ideally the same as in C++, but that may
     *            be tricky.
     */
    private static int m_seed = 1;
    private static int  m_rand()
    {
        m_seed = m_seed * 1103515245 + 12345;
        return (m_seed >>> 16) % 32768;
    }

    private final static String[] monkeys = new String[] {
        "🙈", "🙉", "🙊", "🐵", "🐒"};

    // Helper function for formatting error output.
    //   Append a string into a fixed-size field in a StringBuffer.
    //   Blank-pad the string if it is shorter than the field.
    //   Truncate the source string if it is too long.
    //
    private static void appendToBuf(StringBuffer dest, String src, int fieldLen) {
        int appendLen = src.length();
        if (appendLen >= fieldLen) {
            dest.append(src.substring(0, fieldLen));
        } else {
            dest.append(src);
            while (appendLen < fieldLen) {
                dest.append(' ');
                appendLen++;
            }
        }
    }

    // Helper function for formatting error output.
    // Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
    @SuppressWarnings("unused")
    private static void appendCharToBuf(StringBuffer dest, int c, int fieldLen) {
        String hexChars = "0123456789abcdef";
        if (c < 0x10000) {
            dest.append("\\u");
            for (int bn=12; bn>=0; bn-=4) {
                dest.append(hexChars.charAt(((c)>>bn)&0xf));
            }
            appendToBuf(dest, " ", fieldLen-6);
        } else {
            dest.append("\\U");
            for (int bn=28; bn>=0; bn-=4) {
                dest.append(hexChars.charAt(((c)>>bn)&0xf));
            }
            appendToBuf(dest, " ", fieldLen-10);

        }
    }

    /**
     *  Run a RBBI monkey test.  Common routine, for all break iterator types.
     *    Parameters:
     *       bi      - the break iterator to use
     *       mk      - MonkeyKind, abstraction for obtaining expected results
     *       name    - Name of test (char, word, etc.) for use in error messages
     *       seed    - Seed for starting random number generator (parameter from user)
     *       numIterations
     */
    void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int numIterations) {
        int              TESTSTRINGLEN = 500;
        StringBuffer     testText         = new StringBuffer();
        int              numCharClasses;
        List<UnicodeSet> chClasses;
        @SuppressWarnings("unused")
        int              expectedCount    = 0;
        boolean[]        expectedBreaks   = new boolean[TESTSTRINGLEN*2 + 1];
        boolean[]        forwardBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
        boolean[]        reverseBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
        boolean[]        isBoundaryBreaks = new boolean[TESTSTRINGLEN*2 + 1];
        boolean[]        followingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
        boolean[]        precedingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
        int              i;
        int              loopCount        = 0;
        int              errorCount       = 0;
        boolean          printTestData    = false;
        boolean          printBreaksFromBI = false;

        m_seed = seed;

        numCharClasses = mk.charClasses().size();
        chClasses      = mk.charClasses();

        // Verify that the character classes all have at least one member.
        for (i=0; i<numCharClasses; i++) {
            UnicodeSet s = (UnicodeSet)chClasses.get(i);
            if (s == null || s.size() == 0) {
                errln("Character Class " + i + " is null or of zero size.");
                return;
            }
        }

        //--------------------------------------------------------------------------------------------
        //
        //  Debugging settings.  Comment out everything in the following block for normal operation
        //
        //--------------------------------------------------------------------------------------------
        // numIterations = -1;
        // numIterations = 10000;   // Same as exhaustive.
        // RuleBasedBreakIterator_New.fTrace = true;
        // m_seed = 859056465;
        // TESTSTRINGLEN = 50;
        // printTestData = true;
        // printBreaksFromBI = true;
        // ((RuleBasedBreakIterator_New)bi).dump();

        //--------------------------------------------------------------------------------------------
        //
        //  End of Debugging settings.
        //
        //--------------------------------------------------------------------------------------------

        // For minimizing width of class name output.
        int classNameSize = mk.maxClassNameSize();
        while (loopCount < numIterations || numIterations == -1) {
            if (numIterations == -1 && loopCount % 10 == 0) {
                // If test is running in an infinite loop, display a periodic tic so
                //   we can tell that it is making progress.
                System.out.print(monkeys[m_rand() % monkeys.length]);
                if (loopCount % 1_000_000 == 0) {
                    System.out.println("\nTested " + loopCount / 1_000_000 + " million random strings with " + errorCount + " errors");
                }
            }
            // Save current random number seed, so that we can recreate the random numbers
            //   for this loop iteration in event of an error.
            seed = m_seed;

            testText.setLength(0);
            // Populate a test string with data.
            if (printTestData) {
                System.out.println("Test Data string ...");
            }
            final boolean java8OrOlder = System.getProperty("java.version").startsWith("1.");
            for (i=0; i<TESTSTRINGLEN; i++) {
                int        aClassNum = m_rand() % numCharClasses;
                UnicodeSet classSet  = (UnicodeSet)chClasses.get(aClassNum);
                int        charIdx   = m_rand() % classSet.size();
                int        c         = classSet.charAt(charIdx);
                if (c < 0) {   // TODO:  deal with sets containing strings.
                    errln("c < 0");
                }
                if (mk.getDictionarySet().contains(c)) {
                    continue;
                }
                // Do not emit surrogates on Java 8, as the behaviour of regular expressions that
                // match surrogates differs there.
                if (java8OrOlder &&
                        Character.isBmpCodePoint(c) &&
                        Character.isSurrogate((char)c)) {
                    continue;
                }
                // Do not assemble a supplementary character from randomly generated separate surrogates.
                //   (It could be a dictionary character)
                if (c < 0x10000 && Character.isLowSurrogate((char)c) && testText.length() > 0 &&
                        Character.isHighSurrogate(testText.charAt(testText.length()-1))) {
                    continue;
                }
                testText.appendCodePoint(c);
                if (printTestData) {
                    System.out.print(Integer.toHexString(c) + " ");
                }
            }
            if (printTestData) {
                System.out.println();
            }

            Arrays.fill(expectedBreaks, false);
            Arrays.fill(forwardBreaks, false);
            Arrays.fill(reverseBreaks, false);
            Arrays.fill(isBoundaryBreaks, false);
            Arrays.fill(followingBreaks, false);
            Arrays.fill(precedingBreaks, false);

            // Calculate the expected results for this test string and reset applied rules.
            mk.setText(testText);
            expectedCount = 0;
            expectedBreaks[0] = true;
            int breakPos = 0;
            int lastBreakPos = -1;
            for (;;) {
                lastBreakPos = breakPos;
                breakPos = mk.next(breakPos);
                if (breakPos == -1) {
                    break;
                }
                if (breakPos > testText.length()) {
                    errln("breakPos > testText.length()");
                }
                if (lastBreakPos >= breakPos) {
                    errln("Next() not increasing.");
                    // break;
                }
                expectedBreaks[breakPos] = true;
            }

            // Find the break positions using forward iteration
            if (printBreaksFromBI) {
                System.out.println("Breaks from BI...");
            }
            bi.setText(testText.toString());
            for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
                if (i < 0 || i > testText.length()) {
                    errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
                    break;
                }
                if (printBreaksFromBI) {
                    System.out.print(Integer.toHexString(i) + " ");
                }
                forwardBreaks[i] = true;
            }
            if (printBreaksFromBI) {
                System.out.println();
            }

            // Find the break positions using reverse iteration
            for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) {
                if (i < 0 || i > testText.length()) {
                    errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
                    break;
                }
                reverseBreaks[i] = true;
            }

            // Find the break positions using isBoundary() tests.
            for (i=0; i<=testText.length(); i++) {
                isBoundaryBreaks[i] = bi.isBoundary(i);
            }

            // Find the break positions using the following() function.
            lastBreakPos = 0;
            followingBreaks[0] = true;
            for (i=0; i<testText.length(); i++) {
                breakPos = bi.following(i);
                if (breakPos <= i ||
                        breakPos < lastBreakPos ||
                        breakPos > testText.length() ||
                        breakPos > lastBreakPos && lastBreakPos > i ) {
                    errln(name + " break monkey test: " +
                            "Out of range value returned by BreakIterator::following().\n" +
                            "index=" + i + "following returned=" + breakPos +
                            "lastBreak=" + lastBreakPos);
                    precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
                } else {
                    followingBreaks[breakPos] = true;
                    lastBreakPos = breakPos;
                }
            }

            // Find the break positions using the preceding() function.
            lastBreakPos = testText.length();
            precedingBreaks[testText.length()] = true;
            for (i=testText.length(); i>0; i--) {
                breakPos = bi.preceding(i);
                if (breakPos >= i ||
                        breakPos > lastBreakPos ||
                        breakPos < 0 ||
                        breakPos < lastBreakPos && lastBreakPos < i ) {
                    errln(name + " break monkey test: " +
                            "Out of range value returned by BreakIterator::preceding().\n" +
                            "index=" + i + "preceding returned=" + breakPos +
                            "lastBreak=" + lastBreakPos);
                    precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
                } else {
                    precedingBreaks[breakPos] = true;
                    lastBreakPos = breakPos;
                }
            }



            // Compare the expected and actual results.
            for (i=0; i<=testText.length(); i++) {
                String errorType = null;
                boolean[] currentBreakData = null;
                if  (forwardBreaks[i] != expectedBreaks[i]) {
                    errorType = "next()";
                    currentBreakData = forwardBreaks;
                } else if (reverseBreaks[i] != forwardBreaks[i]) {
                    errorType = "previous()";
                    currentBreakData = reverseBreaks;
                } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
                    errorType = "isBoundary()";
                    currentBreakData = isBoundaryBreaks;
                } else if (followingBreaks[i] != expectedBreaks[i]) {
                    errorType = "following()";
                    currentBreakData = followingBreaks;
                } else if (precedingBreaks[i] != expectedBreaks[i]) {
                    errorType = "preceding()";
                    currentBreakData = precedingBreaks;
                }

                if (errorType != null) {
                    ++errorCount;
                    // Format a range of the test text that includes the failure as
                    //  a data item that can be included in the rbbi test data file.

                    // Start of the range is the last point where expected and actual results
                    //   both agreed that there was a break position.
                    int startContext = i;
                    int count = 0;
                    for (;;) {
                        if (startContext==0) { break; }
                        startContext --;
                        if (expectedBreaks[startContext]) {
                            if (count == 2) break;
                            count ++;
                        }
                    }

                    // End of range is two expected breaks past the start position.
                    int endContext = i + 1;
                    int ci;
                    for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
                        for (;;) {
                            if (endContext >= testText.length()) {break;}
                            if (expectedBreaks[endContext-1]) {
                                if (count == 0) break;
                                count --;
                            }
                            endContext ++;
                        }
                    }

                    // Formatting of each line includes:
                    //   character code
                    //   reference break: '|' -> a break, '.' -> no break
                    //   actual break:    '|' -> a break, '.' -> no break
                    //   (name of character clase)
                    //   Unicode name of character
                    //   '--→' indicates location of the difference.

                    StringBuilder buffer = new StringBuilder();
                    buffer.append("\n")
                        .append((expectedBreaks[i] ? "Break expected but not found." : "Break found but not expected."))
                        .append(
                            String.format(" at index %d. Parameters to reproduce: -Dtest=RBBITestMonkey#Test%sMonkey -Dseed=%d -Dloop=1\n",
                              i, name, seed));

                    int c;  // Char from test data
                    for (ci = startContext;  ci <= endContext && ci != -1;  ci = nextCP(testText, ci)) {
                        if (ci == testText.length()) {
                            break;  // TODO(egg): The index dance above seems wrong.
                        }
                        c = testText.codePointAt(ci);
                        buffer.append((ci == i) ? " --→" : "    ")
                            .append(String.format(" %3d : ", ci))
                            .append(!expectedBreaks[ci] ? " . " : " | ")  // Reference break
                            .append(!currentBreakData[ci] ? " . " : " | "); // Actual break

                        // BMP or SMP character in hex
                        if (c >= 0x10000) {
                            buffer.append("\\U").append(String.format("%08x", c));
                        } else {
                            buffer.append("    \\u").append(String.format("%04x", c));
                        }

                        buffer.append(
                            String.format(String.format(" %%-%ds", classNameSize),
                              mk.classNameFromCodepoint(c)))
                            .append(String.format(" %-40s", mk.getAppliedRule(ci)))
                            .append(String.format(" %-40s\n", UCharacter.getExtendedName(c)));

                        if (ci >= endContext) { break; }
                    }
                    errln(buffer.toString());

                    break;
                }
            }

            loopCount++;
        }
    }

    // Test parameters are passed on the command line, or
    // via the Eclipse Run Configuration settings, arguments tab, VM parameters.
    // For example,
    //      -ea -Dseed=554654 -Dloop=1

    @Test
    public void TestCharMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        RBBICharMonkey  m = new RBBICharMonkey();
        BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
        RunMonkey(bi, m, "Char", seed, loopCount);
    }

    @Test
    public void TestWordMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        logln("Word Break Monkey Test");
        RBBIWordMonkey  m = new RBBIWordMonkey();
        BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
        RunMonkey(bi, m, "Word", seed, loopCount);
    }

    @Test
    public void TestLineMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        logln("Line Break Monkey Test");
        RBBILineMonkey  m = new RBBILineMonkey();
        BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
        try {
            RunMonkey(bi, m, "Line", seed, loopCount);
        } catch(IllegalArgumentException e) {
            if (e.getMessage().equals("Invalid code point U+-000001")) {
                // Looks like you used class UnicodeSet instead of class XUnicodeSet
                // (note the leading 'X').
                // See the comment before the definition of class XUnicodeSet.
                errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
            } else {
                throw e;
            }
        }
    }

    @Test
    public void TestSentMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 3000);
        int seed = getIntProperty("seed", 1);

        logln("Sentence Break Monkey Test");
        RBBISentenceMonkey  m = new RBBISentenceMonkey();
        BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
        RunMonkey(bi, m, "Sent", seed, loopCount);
    }
    //
    //  Round-trip monkey tests.
    //  Verify that break iterators created from the rule source from the default
    //    break iterators still pass the monkey test for the iterator type.
    //
    //  This is a major test for the Rule Compiler.  The default break iterators are built
    //  from pre-compiled binary rule data that was created using ICU4C; these
    //  round-trip rule recompile tests verify that the Java rule compiler can
    //  rebuild break iterators from the original source rules.
    //
    @Test
    public void TestRTCharMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        RBBICharMonkey  m = new RBBICharMonkey();
        BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTChar", seed, loopCount);
    }

    @Test
    public void TestRTWordMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        logln("Word Break Monkey Test");
        RBBIWordMonkey  m = new RBBIWordMonkey();
        BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTWord", seed, loopCount);
    }

    @Test
    public void TestRTLineMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        logln("Line Break Monkey Test");
        RBBILineMonkey  m = new RBBILineMonkey();
        BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        try {
            RunMonkey(rtbi, m, "RTLine", seed, loopCount);
        } catch(IllegalArgumentException e) {
            if (e.getMessage().equals("Invalid code point U+-000001")) {
                // Looks like you used class UnicodeSet instead of class XUnicodeSet
                // (note the leading 'X').
                // See the comment before the definition of class XUnicodeSet.
                errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
            } else {
                throw e;
            }
        }
    }

    @Test
    public void TestRTSentMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 1000);
        int seed = getIntProperty("seed", 1);

        logln("Sentence Break Monkey Test");
        RBBISentenceMonkey  m = new RBBISentenceMonkey();
        BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTSent", seed, loopCount);
    }
}
