001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.StringReader; 027import java.nio.CharBuffer; 028import java.nio.channels.Channels; 029import java.nio.channels.ReadableByteChannel; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.IllegalCharsetNameException; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.InputMismatchException; 035import java.util.Iterator; 036import java.util.Map; 037import java.util.NoSuchElementException; 038import java.util.Objects; 039import java.util.function.Function; 040import java.util.regex.Matcher; 041import java.util.regex.Pattern; 042 043public final class Scanner implements Iterator<String>, Closeable { 044 045 private static final Map<String, Pattern> CACHE = LRUCacheFactory.newLRUCache(7); 046 047 private static final String WHITESPACE_PATTERN = "\\s+"; 048 049 private static final String FIND_ANY_PATTERN = "(?s).*"; 050 051 private static final int BUFFER_SIZE = 1024; 052 053 private Readable source; 054 private Pattern delimPattern; 055 private Matcher matcher; 056 private CharBuffer buf; 057 private int position; 058 private boolean inputExhausted; 059 private boolean needInput; 060 private boolean skipped; 061 private int savedPosition = -1; 062 private boolean closed; 063 private IOException lastIOException; 064 065 public Scanner(InputStream source, String charsetName, String pattern) { 066 this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern)); 067 } 068 069 public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException { 070 this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern); 071 } 072 073 public Scanner(String source, String pattern) { 074 this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern)); 075 } 076 077 public Scanner(ReadableByteChannel source, String charsetName, String pattern) { 078 this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern)); 079 } 080 081 public Scanner(Readable source, String pattern) { 082 this(Objects.requireNonNull(source, "source"), cachePattern(pattern)); 083 } 084 085 private Scanner(Readable source, Pattern pattern) { 086 this.source = source; 087 delimPattern = pattern != null ? pattern : cachePattern(WHITESPACE_PATTERN); 088 buf = CharBuffer.allocate(BUFFER_SIZE); 089 buf.limit(0); 090 matcher = delimPattern.matcher(buf); 091 matcher.useTransparentBounds(true); 092 matcher.useAnchoringBounds(false); 093 } 094 095 private static CharsetDecoder toDecoder(String charsetName) { 096 try { 097 Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset(); 098 return cs.newDecoder(); 099 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 100 throw new IllegalArgumentException(e); 101 } 102 } 103 104 public boolean hasNext() { 105 checkClosed(); 106 saveState(); 107 while (!inputExhausted) { 108 if (hasTokenInBuffer()) { 109 revertState(); 110 return true; 111 } 112 readMore(); 113 } 114 boolean result = hasTokenInBuffer(); 115 revertState(); 116 return result; 117 } 118 119 public String next() { 120 checkClosed(); 121 while (true) { 122 String token = getCompleteTokenInBuffer(); 123 if (token != null) { 124 skipped = false; 125 return token; 126 } 127 if (needInput) { 128 readMore(); 129 } else { 130 throwFor(); 131 } 132 } 133 } 134 135 private void saveState() { 136 savedPosition = position; 137 } 138 139 private void revertState() { 140 position = savedPosition; 141 savedPosition = -1; 142 skipped = false; 143 } 144 145 private void readMore() { 146 if (buf.limit() == buf.capacity()) { 147 expandBuffer(); 148 } 149 int p = buf.position(); 150 buf.position(buf.limit()); 151 buf.limit(buf.capacity()); 152 int n; 153 try { 154 n = source.read(buf); 155 } catch (IOException ioe) { 156 lastIOException = ioe; 157 n = -1; 158 } 159 if (n == -1) { 160 inputExhausted = true; 161 needInput = false; 162 } else if (n > 0) { 163 needInput = false; 164 } 165 buf.limit(buf.position()); 166 buf.position(p); 167 } 168 169 private void expandBuffer() { 170 int offset = savedPosition == -1 ? position : savedPosition; 171 buf.position(offset); 172 if (offset > 0) { 173 buf.compact(); 174 translateSavedIndexes(offset); 175 position -= offset; 176 buf.flip(); 177 } else { 178 int newSize = buf.capacity() * 2; 179 CharBuffer newBuf = CharBuffer.allocate(newSize); 180 newBuf.put(buf); 181 newBuf.flip(); 182 translateSavedIndexes(offset); 183 position -= offset; 184 buf = newBuf; 185 matcher.reset(buf); 186 } 187 } 188 189 private void translateSavedIndexes(int offset) { 190 if (savedPosition != -1) { 191 savedPosition -= offset; 192 } 193 } 194 195 private void throwFor() { 196 skipped = false; 197 if (inputExhausted && position == buf.limit()) { 198 throw new NoSuchElementException(); 199 } else { 200 throw new InputMismatchException(); 201 } 202 } 203 204 private boolean hasTokenInBuffer() { 205 matcher.usePattern(delimPattern); 206 matcher.region(position, buf.limit()); 207 if (matcher.lookingAt()) { 208 position = matcher.end(); 209 } 210 return position != buf.limit(); 211 } 212 213 private String getCompleteTokenInBuffer() { 214 matcher.usePattern(delimPattern); 215 if (!skipped) { 216 matcher.region(position, buf.limit()); 217 if (matcher.lookingAt()) { 218 if (matcher.hitEnd() && !inputExhausted) { 219 needInput = true; 220 return null; 221 } 222 skipped = true; 223 position = matcher.end(); 224 } 225 } 226 if (position == buf.limit()) { 227 if (inputExhausted) { 228 return null; 229 } 230 needInput = true; 231 return null; 232 } 233 matcher.region(position, buf.limit()); 234 boolean foundNextDelim = matcher.find(); 235 if (foundNextDelim && (matcher.end() == position)) { 236 foundNextDelim = matcher.find(); 237 } 238 if (foundNextDelim) { 239 if (matcher.requireEnd() && !inputExhausted) { 240 needInput = true; 241 return null; 242 } 243 int tokenEnd = matcher.start(); 244 matcher.usePattern(cachePattern(FIND_ANY_PATTERN)); 245 matcher.region(position, tokenEnd); 246 if (matcher.matches()) { 247 String s = matcher.group(); 248 position = matcher.end(); 249 return s; 250 } else { 251 return null; 252 } 253 } 254 if (inputExhausted) { 255 matcher.usePattern(cachePattern(FIND_ANY_PATTERN)); 256 matcher.region(position, buf.limit()); 257 if (matcher.matches()) { 258 String s = matcher.group(); 259 position = matcher.end(); 260 return s; 261 } 262 return null; 263 } 264 needInput = true; 265 return null; 266 } 267 268 private void checkClosed() { 269 if (closed) { 270 throw new IllegalStateException(); 271 } 272 } 273 274 public void close() { 275 if (!closed) { 276 if (source instanceof Closeable) { 277 try { 278 ((Closeable) source).close(); 279 } catch (IOException e) { 280 lastIOException = e; 281 } 282 } 283 closed = true; 284 } 285 } 286 287 public IOException ioException() { 288 return lastIOException; 289 } 290 291 private static Pattern cachePattern(String pattern) { 292 if (pattern == null) { 293 return null; 294 } 295 return CACHE.computeIfAbsent(pattern, new Function<String, Pattern>() { 296 @Override 297 public Pattern apply(String s) { 298 return Pattern.compile(s); 299 } 300 }); 301 } 302 303}