001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.ArrayList;
020import java.util.List;
021import java.util.StringTokenizer;
022
023/**
024 * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
025 * <p>
026 * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
027 * and <a href="http://springframework.org">Spring Framework</a>.
028 * <p>
029 * The mapping matches URLs using the following rules:<br>
030 * <ul>
031 *   <li>? matches one character</li>
032 *   <li>* matches zero or more characters</li>
033 *   <li>** matches zero or more 'directories' in a path</li>
034 * </ul>
035 * <p>
036 * Some examples:<br>
037 * <ul>
038 *   <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
039 *       <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
040 *   </li>
041 *   <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
042 *       <code>com</code> directory
043 *   </li>
044 *   <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
045 *       files underneath the <code>com</code> path
046 *   </li>
047 *   <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
048 *       <code>.jsp</code> files underneath the <code>org/springframework</code> path
049 *   </li>
050 *   <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
051 *       <code>org/springframework/servlet/bla.jsp</code> but also
052 *       <code>org/springframework/testing/servlet/bla.jsp</code> and
053 *       <code>org/servlet/bla.jsp</code>
054 *   </li>
055 * </ul>
056 */
057public class AntPathMatcher {
058    public static final AntPathMatcher INSTANCE = new AntPathMatcher();
059
060    /** Default path separator: "/" */
061    public static final String DEFAULT_PATH_SEPARATOR = "/";
062
063    private String pathSeparator = DEFAULT_PATH_SEPARATOR;
064
065    /**
066     * Set the path separator to use for pattern parsing. Default is "/", as in
067     * Ant.
068     */
069    public void setPathSeparator(String pathSeparator) {
070        this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
071    }
072
073    public boolean isPattern(String path) {
074        return path.indexOf('*') != -1 || path.indexOf('?') != -1;
075    }
076
077    public boolean match(String pattern, String path) {
078        return match(pattern, path, true);
079    }
080
081    public boolean matchStart(String pattern, String path) {
082        return matchStart(pattern, path, true);
083    }
084
085    public boolean match(String pattern, String path, boolean isCaseSensitive) {
086        return doMatch(pattern, path, true, isCaseSensitive);
087    }
088
089    public boolean matchStart(String pattern, String path, boolean isCaseSensitive) {
090        return doMatch(pattern, path, false, isCaseSensitive);
091    }
092
093    /**
094     * Actually match the given <code>path</code> against the given
095     * <code>pattern</code>.
096     * 
097     * @param pattern the pattern to match against
098     * @param path the path String to test
099     * @param fullMatch whether a full pattern match is required (else a pattern
100     *            match as far as the given base path goes is sufficient)
101     * @param isCaseSensitive Whether or not matching should be performed
102     *                        case sensitively.
103     * @return <code>true</code> if the supplied <code>path</code> matched,
104     *         <code>false</code> if it didn't
105     */
106    protected boolean doMatch(String pattern, String path, boolean fullMatch, boolean isCaseSensitive) {
107        if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
108            return false;
109        }
110
111        String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
112        String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
113
114        int pattIdxStart = 0;
115        int pattIdxEnd = pattDirs.length - 1;
116        int pathIdxStart = 0;
117        int pathIdxEnd = pathDirs.length - 1;
118
119        // Match all elements up to the first **
120        while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
121            String patDir = pattDirs[pattIdxStart];
122            if ("**".equals(patDir)) {
123                break;
124            }
125            if (!matchStrings(patDir, pathDirs[pathIdxStart], isCaseSensitive)) {
126                return false;
127            }
128            pattIdxStart++;
129            pathIdxStart++;
130        }
131
132        if (pathIdxStart > pathIdxEnd) {
133            // Path is exhausted, only match if rest of pattern is * or **'s
134            if (pattIdxStart > pattIdxEnd) {
135                return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
136                    .endsWith(this.pathSeparator);
137            }
138            if (!fullMatch) {
139                return true;
140            }
141            if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
142                && path.endsWith(this.pathSeparator)) {
143                return true;
144            }
145            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
146                if (!pattDirs[i].equals("**")) {
147                    return false;
148                }
149            }
150            return true;
151        } else if (pattIdxStart > pattIdxEnd) {
152            // String not exhausted, but pattern is. Failure.
153            return false;
154        } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
155            // Path start definitely matches due to "**" part in pattern.
156            return true;
157        }
158
159        // up to last '**'
160        while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
161            String patDir = pattDirs[pattIdxEnd];
162            if (patDir.equals("**")) {
163                break;
164            }
165            if (!matchStrings(patDir, pathDirs[pathIdxEnd], isCaseSensitive)) {
166                return false;
167            }
168            pattIdxEnd--;
169            pathIdxEnd--;
170        }
171        if (pathIdxStart > pathIdxEnd) {
172            // String is exhausted
173            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
174                if (!pattDirs[i].equals("**")) {
175                    return false;
176                }
177            }
178            return true;
179        }
180
181        while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
182            int patIdxTmp = -1;
183            for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
184                if (pattDirs[i].equals("**")) {
185                    patIdxTmp = i;
186                    break;
187                }
188            }
189            if (patIdxTmp == pattIdxStart + 1) {
190                // '**/**' situation, so skip one
191                pattIdxStart++;
192                continue;
193            }
194            // Find the pattern between padIdxStart & padIdxTmp in str between
195            // strIdxStart & strIdxEnd
196            int patLength = patIdxTmp - pattIdxStart - 1;
197            int strLength = pathIdxEnd - pathIdxStart + 1;
198            int foundIdx = -1;
199
200        strLoop:
201            for (int i = 0; i <= strLength - patLength; i++) {
202                for (int j = 0; j < patLength; j++) {
203                    String subPat = pattDirs[pattIdxStart + j + 1];
204                    String subStr = pathDirs[pathIdxStart + i + j];
205                    if (!matchStrings(subPat, subStr, isCaseSensitive)) {
206                        continue strLoop;
207                    }
208                }
209                foundIdx = pathIdxStart + i;
210                break;
211            }
212
213            if (foundIdx == -1) {
214                return false;
215            }
216
217            pattIdxStart = patIdxTmp;
218            pathIdxStart = foundIdx + patLength;
219        }
220
221        for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
222            if (!pattDirs[i].equals("**")) {
223                return false;
224            }
225        }
226
227        return true;
228    }
229
230    /**
231     * Tests whether or not a string matches against a pattern. The pattern may
232     * contain two special characters:<br>
233     * '*' means zero or more characters<br>
234     * '?' means one and only one character
235     * 
236     * @param pattern pattern to match against. Must not be <code>null</code>.
237     * @param str string which must be matched against the pattern. Must not be
238     *            <code>null</code>.
239     * @param caseSensitive Whether or not matching should be performed
240     *                      case sensitively.
241     * @return <code>true</code> if the string matches against the pattern, or
242     *         <code>false</code> otherwise.
243     */
244    private boolean matchStrings(String pattern, String str, boolean caseSensitive) {
245        char[] patArr = pattern.toCharArray();
246        char[] strArr = str.toCharArray();
247        int patIdxStart = 0;
248        int patIdxEnd = patArr.length - 1;
249        int strIdxStart = 0;
250        int strIdxEnd = strArr.length - 1;
251        char ch;
252
253        boolean containsStar = false;
254        for (char c : patArr) {
255            if (c == '*') {
256                containsStar = true;
257                break;
258            }
259        }
260
261        if (!containsStar) {
262            // No '*'s, so we make a shortcut
263            if (patIdxEnd != strIdxEnd) {
264                return false; // Pattern and string do not have the same size
265            }
266            for (int i = 0; i <= patIdxEnd; i++) {
267                ch = patArr[i];
268                if (ch != '?') {
269                    if (different(caseSensitive, ch, strArr[i])) {
270                        return false;
271                        // Character mismatch
272                    }
273                }
274            }
275            return true; // String matches against pattern
276        }
277
278        if (patIdxEnd == 0) {
279            return true; // Pattern contains only '*', which matches anything
280        }
281
282        // Process characters before first star
283        while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
284            if (ch != '?') {
285                if (different(caseSensitive, ch, strArr[strIdxStart])) {
286                    return false;
287                    // Character mismatch
288                }
289            }
290            patIdxStart++;
291            strIdxStart++;
292        }
293        if (strIdxStart > strIdxEnd) {
294            // All characters in the string are used. Check if only '*'s are
295            // left in the pattern. If so, we succeeded. Otherwise failure.
296            for (int i = patIdxStart; i <= patIdxEnd; i++) {
297                if (patArr[i] != '*') {
298                    return false;
299                }
300            }
301            return true;
302        }
303
304        // Process characters after last star
305        while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
306            if (ch != '?') {
307                if (different(caseSensitive, ch, strArr[strIdxEnd])) {
308                    return false;
309                    // Character mismatch
310                }
311            }
312            patIdxEnd--;
313            strIdxEnd--;
314        }
315        if (strIdxStart > strIdxEnd) {
316            // All characters in the string are used. Check if only '*'s are
317            // left in the pattern. If so, we succeeded. Otherwise failure.
318            for (int i = patIdxStart; i <= patIdxEnd; i++) {
319                if (patArr[i] != '*') {
320                    return false;
321                }
322            }
323            return true;
324        }
325
326        // process pattern between stars. padIdxStart and patIdxEnd point
327        // always to a '*'.
328        while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
329            int patIdxTmp = -1;
330            for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
331                if (patArr[i] == '*') {
332                    patIdxTmp = i;
333                    break;
334                }
335            }
336            if (patIdxTmp == patIdxStart + 1) {
337                // Two stars next to each other, skip the first one.
338                patIdxStart++;
339                continue;
340            }
341            // Find the pattern between padIdxStart & padIdxTmp in str between
342            // strIdxStart & strIdxEnd
343            int patLength = patIdxTmp - patIdxStart - 1;
344            int strLength = strIdxEnd - strIdxStart + 1;
345            int foundIdx = -1;
346        strLoop: 
347            for (int i = 0; i <= strLength - patLength; i++) {
348                for (int j = 0; j < patLength; j++) {
349                    ch = patArr[patIdxStart + j + 1];
350                    if (ch != '?') {
351                        if (different(caseSensitive, ch, strArr[strIdxStart + i + j])) {
352                            continue strLoop;
353                        }
354                    }
355                }
356
357                foundIdx = strIdxStart + i;
358                break;
359            }
360
361            if (foundIdx == -1) {
362                return false;
363            }
364
365            patIdxStart = patIdxTmp;
366            strIdxStart = foundIdx + patLength;
367        }
368
369        // All characters in the string are used. Check if only '*'s are left
370        // in the pattern. If so, we succeeded. Otherwise failure.
371        for (int i = patIdxStart; i <= patIdxEnd; i++) {
372            if (patArr[i] != '*') {
373                return false;
374            }
375        }
376
377        return true;
378    }
379
380    /**
381     * Given a pattern and a full path, determine the pattern-mapped part.
382     * <p>
383     * For example:
384     * <ul>
385     * <li>'<code>/docs/cvs/commit.html</code>' and '
386     * <code>/docs/cvs/commit.html</code> -> ''</li>
387     * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
388     * <code>cvs/commit</code>'</li>
389     * <li>'<code>/docs/cvs/*.html</code>' and '
390     * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
391     * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
392     * <code>cvs/commit</code>'</li>
393     * <li>'<code>/docs/**\/*.html</code>' and '
394     * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
395     * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
396     * <code>docs/cvs/commit.html</code>'</li>
397     * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
398     * <code>/docs/cvs/commit.html</code>'</li>
399     * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
400     * <code>/docs/cvs/commit.html</code>'</li>
401     * </ul>
402     * <p>
403     * Assumes that {@link #match} returns <code>true</code> for '
404     * <code>pattern</code>' and '<code>path</code>', but does
405     * <strong>not</strong> enforce this.
406     */
407    public String extractPathWithinPattern(String pattern, String path) {
408        String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
409        String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
410
411        StringBuilder buffer = new StringBuilder();
412
413        // Add any path parts that have a wildcarded pattern part.
414        int puts = 0;
415        for (int i = 0; i < patternParts.length; i++) {
416            String patternPart = patternParts[i];
417            if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
418                if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
419                    buffer.append(this.pathSeparator);
420                }
421                buffer.append(pathParts[i]);
422                puts++;
423            }
424        }
425
426        // Append any trailing path parts.
427        for (int i = patternParts.length; i < pathParts.length; i++) {
428            if (puts > 0 || i > 0) {
429                buffer.append(this.pathSeparator);
430            }
431            buffer.append(pathParts[i]);
432        }
433
434        return buffer.toString();
435    }
436
437    /**
438     * Tokenize the given String into a String array via a StringTokenizer.
439     * Trims tokens and omits empty tokens.
440     * <p>
441     * The given delimiters string is supposed to consist of any number of
442     * delimiter characters. Each of those characters can be used to separate
443     * tokens. A delimiter is always a single character; for multi-character
444     * delimiters, consider using <code>delimitedListToStringArray</code>
445     * 
446     * @param str the String to tokenize
447     * @param delimiters the delimiter characters, assembled as String (each of
448     *            those characters is individually considered as delimiter).
449     * @return an array of the tokens
450     * @see java.util.StringTokenizer
451     * @see java.lang.String#trim()
452     */
453    public static String[] tokenizeToStringArray(String str, String delimiters) {
454        if (str == null) {
455            return null;
456        }
457        StringTokenizer st = new StringTokenizer(str, delimiters);
458        List<String> tokens = new ArrayList<>();
459        while (st.hasMoreTokens()) {
460            String token = st.nextToken();
461            token = token.trim();
462            if (token.length() > 0) {
463                tokens.add(token);
464            }
465        }
466        return tokens.toArray(new String[tokens.size()]);
467    }
468
469    private static boolean different(boolean caseSensitive, char ch, char other) {
470        return caseSensitive ? ch != other : Character.toUpperCase(ch) != Character.toUpperCase(other);
471    }
472}