001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.util;
018    
019    import java.util.ArrayList;
020    import java.util.List;
021    import java.util.StringTokenizer;
022    
023    /**
024     * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
025     * <p>
026     * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
027     * and <a href="http://springframework.org">Spring Framework</a>.
028     * <p>
029     * The mapping matches URLs using the following rules:<br>
030     * <ul>
031     *   <li>? matches one character</li>
032     *   <li>* matches zero or more characters</li>
033     *   <li>** matches zero or more 'directories' in a path</li>
034     * </ul>
035     * <p>
036     * Some examples:<br>
037     * <ul>
038     *   <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
039     *       <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
040     *   </li>
041     *   <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
042     *       <code>com</code> directory
043     *   </li>
044     *   <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
045     *       files underneath the <code>com</code> path
046     *   </li>
047     *   <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
048     *       <code>.jsp</code> files underneath the <code>org/springframework</code> path
049     *   </li>
050     *   <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
051     *       <code>org/springframework/servlet/bla.jsp</code> but also
052     *       <code>org/springframework/testing/servlet/bla.jsp</code> and
053     *       <code>org/servlet/bla.jsp</code>
054     *   </li>
055     * </ul>
056     */
057    public class AntPathMatcher {
058    
059        /** Default path separator: "/" */
060        public static final String DEFAULT_PATH_SEPARATOR = "/";
061    
062        private String pathSeparator = DEFAULT_PATH_SEPARATOR;
063    
064        /**
065         * Set the path separator to use for pattern parsing. Default is "/", as in
066         * Ant.
067         */
068        public void setPathSeparator(String pathSeparator) {
069            this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
070        }
071    
072        public boolean isPattern(String path) {
073            return path.indexOf('*') != -1 || path.indexOf('?') != -1;
074        }
075    
076        public boolean match(String pattern, String path) {
077            return doMatch(pattern, path, true);
078        }
079    
080        public boolean matchStart(String pattern, String path) {
081            return doMatch(pattern, path, false);
082        }
083    
084        /**
085         * Actually match the given <code>path</code> against the given
086         * <code>pattern</code>.
087         * 
088         * @param pattern the pattern to match against
089         * @param path the path String to test
090         * @param fullMatch whether a full pattern match is required (else a pattern
091         *            match as far as the given base path goes is sufficient)
092         * @return <code>true</code> if the supplied <code>path</code> matched,
093         *         <code>false</code> if it didn't
094         */
095        protected boolean doMatch(String pattern, String path, boolean fullMatch) {
096            if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
097                return false;
098            }
099    
100            String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
101            String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
102    
103            int pattIdxStart = 0;
104            int pattIdxEnd = pattDirs.length - 1;
105            int pathIdxStart = 0;
106            int pathIdxEnd = pathDirs.length - 1;
107    
108            // Match all elements up to the first **
109            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
110                String patDir = pattDirs[pattIdxStart];
111                if ("**".equals(patDir)) {
112                    break;
113                }
114                if (!matchStrings(patDir, pathDirs[pathIdxStart])) {
115                    return false;
116                }
117                pattIdxStart++;
118                pathIdxStart++;
119            }
120    
121            if (pathIdxStart > pathIdxEnd) {
122                // Path is exhausted, only match if rest of pattern is * or **'s
123                if (pattIdxStart > pattIdxEnd) {
124                    return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
125                        .endsWith(this.pathSeparator);
126                }
127                if (!fullMatch) {
128                    return true;
129                }
130                if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
131                    && path.endsWith(this.pathSeparator)) {
132                    return true;
133                }
134                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
135                    if (!pattDirs[i].equals("**")) {
136                        return false;
137                    }
138                }
139                return true;
140            } else if (pattIdxStart > pattIdxEnd) {
141                // String not exhausted, but pattern is. Failure.
142                return false;
143            } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
144                // Path start definitely matches due to "**" part in pattern.
145                return true;
146            }
147    
148            // up to last '**'
149            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
150                String patDir = pattDirs[pattIdxEnd];
151                if (patDir.equals("**")) {
152                    break;
153                }
154                if (!matchStrings(patDir, pathDirs[pathIdxEnd])) {
155                    return false;
156                }
157                pattIdxEnd--;
158                pathIdxEnd--;
159            }
160            if (pathIdxStart > pathIdxEnd) {
161                // String is exhausted
162                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
163                    if (!pattDirs[i].equals("**")) {
164                        return false;
165                    }
166                }
167                return true;
168            }
169    
170            while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
171                int patIdxTmp = -1;
172                for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
173                    if (pattDirs[i].equals("**")) {
174                        patIdxTmp = i;
175                        break;
176                    }
177                }
178                if (patIdxTmp == pattIdxStart + 1) {
179                    // '**/**' situation, so skip one
180                    pattIdxStart++;
181                    continue;
182                }
183                // Find the pattern between padIdxStart & padIdxTmp in str between
184                // strIdxStart & strIdxEnd
185                int patLength = patIdxTmp - pattIdxStart - 1;
186                int strLength = pathIdxEnd - pathIdxStart + 1;
187                int foundIdx = -1;
188    
189            strLoop:
190                for (int i = 0; i <= strLength - patLength; i++) {
191                    for (int j = 0; j < patLength; j++) {
192                        String subPat = pattDirs[pattIdxStart + j + 1];
193                        String subStr = pathDirs[pathIdxStart + i + j];
194                        if (!matchStrings(subPat, subStr)) {
195                            continue strLoop;
196                        }
197                    }
198                    foundIdx = pathIdxStart + i;
199                    break;
200                }
201    
202                if (foundIdx == -1) {
203                    return false;
204                }
205    
206                pattIdxStart = patIdxTmp;
207                pathIdxStart = foundIdx + patLength;
208            }
209    
210            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
211                if (!pattDirs[i].equals("**")) {
212                    return false;
213                }
214            }
215    
216            return true;
217        }
218    
219        /**
220         * Tests whether or not a string matches against a pattern. The pattern may
221         * contain two special characters:<br>
222         * '*' means zero or more characters<br>
223         * '?' means one and only one character
224         * 
225         * @param pattern pattern to match against. Must not be <code>null</code>.
226         * @param str string which must be matched against the pattern. Must not be
227         *            <code>null</code>.
228         * @return <code>true</code> if the string matches against the pattern, or
229         *         <code>false</code> otherwise.
230         */
231        private boolean matchStrings(String pattern, String str) {
232            char[] patArr = pattern.toCharArray();
233            char[] strArr = str.toCharArray();
234            int patIdxStart = 0;
235            int patIdxEnd = patArr.length - 1;
236            int strIdxStart = 0;
237            int strIdxEnd = strArr.length - 1;
238            char ch;
239    
240            boolean containsStar = false;
241            for (char c : patArr) {
242                if (c == '*') {
243                    containsStar = true;
244                    break;
245                }
246            }
247    
248            if (!containsStar) {
249                // No '*'s, so we make a shortcut
250                if (patIdxEnd != strIdxEnd) {
251                    return false; // Pattern and string do not have the same size
252                }
253                for (int i = 0; i <= patIdxEnd; i++) {
254                    ch = patArr[i];
255                    if (ch != '?') {
256                        if (ch != strArr[i]) {
257                            return false;
258                            // Character mismatch
259                        }
260                    }
261                }
262                return true; // String matches against pattern
263            }
264    
265            if (patIdxEnd == 0) {
266                return true; // Pattern contains only '*', which matches anything
267            }
268    
269            // Process characters before first star
270            while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
271                if (ch != '?') {
272                    if (ch != strArr[strIdxStart]) {
273                        return false;
274                        // Character mismatch
275                    }
276                }
277                patIdxStart++;
278                strIdxStart++;
279            }
280            if (strIdxStart > strIdxEnd) {
281                // All characters in the string are used. Check if only '*'s are
282                // left in the pattern. If so, we succeeded. Otherwise failure.
283                for (int i = patIdxStart; i <= patIdxEnd; i++) {
284                    if (patArr[i] != '*') {
285                        return false;
286                    }
287                }
288                return true;
289            }
290    
291            // Process characters after last star
292            while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
293                if (ch != '?') {
294                    if (ch != strArr[strIdxEnd]) {
295                        return false;
296                        // Character mismatch
297                    }
298                }
299                patIdxEnd--;
300                strIdxEnd--;
301            }
302            if (strIdxStart > strIdxEnd) {
303                // All characters in the string are used. Check if only '*'s are
304                // left in the pattern. If so, we succeeded. Otherwise failure.
305                for (int i = patIdxStart; i <= patIdxEnd; i++) {
306                    if (patArr[i] != '*') {
307                        return false;
308                    }
309                }
310                return true;
311            }
312    
313            // process pattern between stars. padIdxStart and patIdxEnd point
314            // always to a '*'.
315            while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
316                int patIdxTmp = -1;
317                for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
318                    if (patArr[i] == '*') {
319                        patIdxTmp = i;
320                        break;
321                    }
322                }
323                if (patIdxTmp == patIdxStart + 1) {
324                    // Two stars next to each other, skip the first one.
325                    patIdxStart++;
326                    continue;
327                }
328                // Find the pattern between padIdxStart & padIdxTmp in str between
329                // strIdxStart & strIdxEnd
330                int patLength = patIdxTmp - patIdxStart - 1;
331                int strLength = strIdxEnd - strIdxStart + 1;
332                int foundIdx = -1;
333            strLoop: 
334                for (int i = 0; i <= strLength - patLength; i++) {
335                    for (int j = 0; j < patLength; j++) {
336                        ch = patArr[patIdxStart + j + 1];
337                        if (ch != '?') {
338                            if (ch != strArr[strIdxStart + i + j]) {
339                                continue strLoop;
340                            }
341                        }
342                    }
343    
344                    foundIdx = strIdxStart + i;
345                    break;
346                }
347    
348                if (foundIdx == -1) {
349                    return false;
350                }
351    
352                patIdxStart = patIdxTmp;
353                strIdxStart = foundIdx + patLength;
354            }
355    
356            // All characters in the string are used. Check if only '*'s are left
357            // in the pattern. If so, we succeeded. Otherwise failure.
358            for (int i = patIdxStart; i <= patIdxEnd; i++) {
359                if (patArr[i] != '*') {
360                    return false;
361                }
362            }
363    
364            return true;
365        }
366    
367        /**
368         * Given a pattern and a full path, determine the pattern-mapped part.
369         * <p>
370         * For example:
371         * <ul>
372         * <li>'<code>/docs/cvs/commit.html</code>' and '
373         * <code>/docs/cvs/commit.html</code> -> ''</li>
374         * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
375         * <code>cvs/commit</code>'</li>
376         * <li>'<code>/docs/cvs/*.html</code>' and '
377         * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
378         * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
379         * <code>cvs/commit</code>'</li>
380         * <li>'<code>/docs/**\/*.html</code>' and '
381         * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
382         * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
383         * <code>docs/cvs/commit.html</code>'</li>
384         * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
385         * <code>/docs/cvs/commit.html</code>'</li>
386         * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
387         * <code>/docs/cvs/commit.html</code>'</li>
388         * </ul>
389         * <p>
390         * Assumes that {@link #match} returns <code>true</code> for '
391         * <code>pattern</code>' and '<code>path</code>', but does
392         * <strong>not</strong> enforce this.
393         */
394        public String extractPathWithinPattern(String pattern, String path) {
395            String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
396            String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
397    
398            StringBuffer buffer = new StringBuffer();
399    
400            // Add any path parts that have a wildcarded pattern part.
401            int puts = 0;
402            for (int i = 0; i < patternParts.length; i++) {
403                String patternPart = patternParts[i];
404                if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
405                    if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
406                        buffer.append(this.pathSeparator);
407                    }
408                    buffer.append(pathParts[i]);
409                    puts++;
410                }
411            }
412    
413            // Append any trailing path parts.
414            for (int i = patternParts.length; i < pathParts.length; i++) {
415                if (puts > 0 || i > 0) {
416                    buffer.append(this.pathSeparator);
417                }
418                buffer.append(pathParts[i]);
419            }
420    
421            return buffer.toString();
422        }
423    
424        /**
425         * Tokenize the given String into a String array via a StringTokenizer.
426         * Trims tokens and omits empty tokens.
427         * <p>
428         * The given delimiters string is supposed to consist of any number of
429         * delimiter characters. Each of those characters can be used to separate
430         * tokens. A delimiter is always a single character; for multi-character
431         * delimiters, consider using <code>delimitedListToStringArray</code>
432         * 
433         * @param str the String to tokenize
434         * @param delimiters the delimiter characters, assembled as String (each of
435         *            those characters is individually considered as delimiter).
436         * @return an array of the tokens
437         * @see java.util.StringTokenizer
438         * @see java.lang.String#trim()
439         */
440        public static String[] tokenizeToStringArray(String str, String delimiters) {
441            if (str == null) {
442                return null;
443            }
444            StringTokenizer st = new StringTokenizer(str, delimiters);
445            List<String> tokens = new ArrayList<String>();
446            while (st.hasMoreTokens()) {
447                String token = st.nextToken();
448                token = token.trim();
449                if (token.length() > 0) {
450                    tokens.add(token);
451                }
452            }
453            return tokens.toArray(new String[tokens.size()]);
454        }
455    
456    }