package org.pageseeder.diffx.load.text;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.pageseeder.diffx.config.WhiteSpaceProcessing;
import org.pageseeder.diffx.token.TextToken;
import org.pageseeder.diffx.token.XMLToken;
import org.pageseeder.diffx.token.impl.CharactersTokenBase;
import org.pageseeder.diffx.token.impl.IgnorableSpaceToken;
import org.pageseeder.diffx.token.impl.SpaceToken;

/* loaded from: classes.dex */
public final class TokenizerByWord implements TextTokenizer {

    /* renamed from: a, reason: collision with root package name */
    public final HashMap f11209a = new HashMap();

    /* renamed from: b, reason: collision with root package name */
    public final WhiteSpaceProcessing f11210b;

    public TokenizerByWord(WhiteSpaceProcessing whiteSpaceProcessing) {
        this.f11210b = whiteSpaceProcessing;
    }

    @Override // org.pageseeder.diffx.load.text.TextTokenizer
    public final List a(CharSequence charSequence) {
        HashMap hashMap;
        if (charSequence == null) {
            throw new NullPointerException("Character sequence is null");
        }
        if (charSequence.length() == 0) {
            return Collections.EMPTY_LIST;
        }
        ArrayList arrayList = new ArrayList(charSequence.length());
        Matcher matcher = Pattern.compile("\\s+").matcher(charSequence);
        int i = 0;
        while (true) {
            boolean find = matcher.find();
            hashMap = this.f11209a;
            if (!find) {
                break;
            }
            if (i != matcher.start()) {
                String charSequence2 = charSequence.subSequence(i, matcher.start()).toString();
                XMLToken xMLToken = (TextToken) hashMap.get(charSequence2);
                if (xMLToken == null) {
                    xMLToken = new CharactersTokenBase(charSequence2);
                    hashMap.put(charSequence2, xMLToken);
                }
                arrayList.add(xMLToken);
            }
            WhiteSpaceProcessing whiteSpaceProcessing = WhiteSpaceProcessing.g;
            WhiteSpaceProcessing whiteSpaceProcessing2 = this.f11210b;
            if (whiteSpaceProcessing2 != whiteSpaceProcessing) {
                String charSequence3 = charSequence.subSequence(matcher.start(), matcher.end()).toString();
                TextToken textToken = (TextToken) hashMap.get(charSequence3);
                if (textToken == null) {
                    textToken = whiteSpaceProcessing2 == WhiteSpaceProcessing.h ? new IgnorableSpaceToken(charSequence3) : SpaceToken.d(charSequence3);
                    hashMap.put(charSequence3, textToken);
                }
                arrayList.add(textToken);
            }
            i = matcher.end();
        }
        if (i != charSequence.length()) {
            String charSequence4 = charSequence.subSequence(i, charSequence.length()).toString();
            XMLToken xMLToken2 = (TextToken) hashMap.get(charSequence4);
            if (xMLToken2 == null) {
                xMLToken2 = new CharactersTokenBase(charSequence4);
                hashMap.put(charSequence4, xMLToken2);
            }
            arrayList.add(xMLToken2);
        }
        return arrayList;
    }
}
