001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.StringReader;
027import java.nio.CharBuffer;
028import java.nio.channels.Channels;
029import java.nio.channels.ReadableByteChannel;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.IllegalCharsetNameException;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.InputMismatchException;
035import java.util.Iterator;
036import java.util.Map;
037import java.util.NoSuchElementException;
038import java.util.Objects;
039import java.util.function.Function;
040import java.util.regex.Matcher;
041import java.util.regex.Pattern;
042
043public final class Scanner implements Iterator<String>, Closeable {
044
045    private static final Map<String, Pattern> CACHE = LRUCacheFactory.newLRUCache(7);
046
047    private static final String WHITESPACE_PATTERN = "\\s+";
048
049    private static final String FIND_ANY_PATTERN = "(?s).*";
050
051    private static final int BUFFER_SIZE = 1024;
052
053    private Readable source;
054    private Pattern delimPattern;
055    private Matcher matcher;
056    private CharBuffer buf;
057    private int position;
058    private boolean inputExhausted;
059    private boolean needInput;
060    private boolean skipped;
061    private int savedPosition = -1;
062    private boolean closed;
063    private IOException lastIOException;
064
065    public Scanner(InputStream source, String charsetName, String pattern) {
066        this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern));
067    }
068
069    public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException {
070        this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern);
071    }
072
073    public Scanner(String source, String pattern) {
074        this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern));
075    }
076
077    public Scanner(ReadableByteChannel source, String charsetName, String pattern) {
078        this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern));
079    }
080
081    public Scanner(Readable source, String pattern) {
082        this(Objects.requireNonNull(source, "source"), cachePattern(pattern));
083    }
084
085    private Scanner(Readable source, Pattern pattern) {
086        this.source = source;
087        delimPattern = pattern != null ? pattern : cachePattern(WHITESPACE_PATTERN);
088        buf = CharBuffer.allocate(BUFFER_SIZE);
089        buf.limit(0);
090        matcher = delimPattern.matcher(buf);
091        matcher.useTransparentBounds(true);
092        matcher.useAnchoringBounds(false);
093    }
094
095    private static CharsetDecoder toDecoder(String charsetName) {
096        try {
097            Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
098            return cs.newDecoder();
099        } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
100            throw new IllegalArgumentException(e);
101        }
102    }
103
104    public boolean hasNext() {
105        checkClosed();
106        saveState();
107        while (!inputExhausted) {
108            if (hasTokenInBuffer()) {
109                revertState();
110                return true;
111            }
112            readMore();
113        }
114        boolean result = hasTokenInBuffer();
115        revertState();
116        return result;
117    }
118
119    public String next() {
120        checkClosed();
121        while (true) {
122            String token = getCompleteTokenInBuffer();
123            if (token != null) {
124                skipped = false;
125                return token;
126            }
127            if (needInput) {
128                readMore();
129            } else {
130                throwFor();
131            }
132        }
133    }
134
135    private void saveState() {
136        savedPosition = position;
137    }
138
139    private void revertState() {
140        position = savedPosition;
141        savedPosition = -1;
142        skipped = false;
143    }
144
145    private void readMore() {
146        if (buf.limit() == buf.capacity()) {
147            expandBuffer();
148        }
149        int p = buf.position();
150        buf.position(buf.limit());
151        buf.limit(buf.capacity());
152        int n;
153        try {
154            n = source.read(buf);
155        } catch (IOException ioe) {
156            lastIOException = ioe;
157            n = -1;
158        }
159        if (n == -1) {
160            inputExhausted = true;
161            needInput = false;
162        } else if (n > 0) {
163            needInput = false;
164        }
165        buf.limit(buf.position());
166        buf.position(p);
167    }
168
169    private void expandBuffer() {
170        int offset = savedPosition == -1 ? position : savedPosition;
171        buf.position(offset);
172        if (offset > 0) {
173            buf.compact();
174            translateSavedIndexes(offset);
175            position -= offset;
176            buf.flip();
177        } else {
178            int newSize = buf.capacity() * 2;
179            CharBuffer newBuf = CharBuffer.allocate(newSize);
180            newBuf.put(buf);
181            newBuf.flip();
182            translateSavedIndexes(offset);
183            position -= offset;
184            buf = newBuf;
185            matcher.reset(buf);
186        }
187    }
188
189    private void translateSavedIndexes(int offset) {
190        if (savedPosition != -1) {
191            savedPosition -= offset;
192        }
193    }
194
195    private void throwFor() {
196        skipped = false;
197        if (inputExhausted && position == buf.limit()) {
198            throw new NoSuchElementException();
199        } else {
200            throw new InputMismatchException();
201        }
202    }
203
204    private boolean hasTokenInBuffer() {
205        matcher.usePattern(delimPattern);
206        matcher.region(position, buf.limit());
207        if (matcher.lookingAt()) {
208            position = matcher.end();
209        }
210        return position != buf.limit();
211    }
212
213    private String getCompleteTokenInBuffer() {
214        matcher.usePattern(delimPattern);
215        if (!skipped) {
216            matcher.region(position, buf.limit());
217            if (matcher.lookingAt()) {
218                if (matcher.hitEnd() && !inputExhausted) {
219                    needInput = true;
220                    return null;
221                }
222                skipped = true;
223                position = matcher.end();
224            }
225        }
226        if (position == buf.limit()) {
227            if (inputExhausted) {
228                return null;
229            }
230            needInput = true;
231            return null;
232        }
233        matcher.region(position, buf.limit());
234        boolean foundNextDelim = matcher.find();
235        if (foundNextDelim && (matcher.end() == position)) {
236            foundNextDelim = matcher.find();
237        }
238        if (foundNextDelim) {
239            if (matcher.requireEnd() && !inputExhausted) {
240                needInput = true;
241                return null;
242            }
243            int tokenEnd = matcher.start();
244            matcher.usePattern(cachePattern(FIND_ANY_PATTERN));
245            matcher.region(position, tokenEnd);
246            if (matcher.matches()) {
247                String s = matcher.group();
248                position = matcher.end();
249                return s;
250            } else {
251                return null;
252            }
253        }
254        if (inputExhausted) {
255            matcher.usePattern(cachePattern(FIND_ANY_PATTERN));
256            matcher.region(position, buf.limit());
257            if (matcher.matches()) {
258                String s = matcher.group();
259                position = matcher.end();
260                return s;
261            }
262            return null;
263        }
264        needInput = true;
265        return null;
266    }
267
268    private void checkClosed() {
269        if (closed) {
270            throw new IllegalStateException();
271        }
272    }
273
274    public void close() {
275        if (!closed) {
276            if (source instanceof Closeable) {
277                try {
278                    ((Closeable) source).close();
279                } catch (IOException e) {
280                    lastIOException = e;
281                }
282            }
283            closed = true;
284        }
285    }
286
287    public IOException ioException() {
288        return lastIOException;
289    }
290
291    private static Pattern cachePattern(String pattern) {
292        if (pattern == null) {
293            return null;
294        }
295        return CACHE.computeIfAbsent(pattern, new Function<String, Pattern>() {
296            @Override
297            public Pattern apply(String s) {
298                return Pattern.compile(s);
299            }
300        });
301    }
302
303}