001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.ar; 020 021 import java.io.EOFException; 022 import java.io.IOException; 023 import java.io.InputStream; 024 025 import org.apache.commons.compress.archivers.ArchiveEntry; 026 import org.apache.commons.compress.archivers.ArchiveInputStream; 027 import org.apache.commons.compress.utils.ArchiveUtils; 028 029 /** 030 * Implements the "ar" archive format as an input stream. 031 * 032 * @NotThreadSafe 033 * 034 */ 035 public class ArArchiveInputStream extends ArchiveInputStream { 036 037 private final InputStream input; 038 private long offset = 0; 039 private boolean closed; 040 041 /* 042 * If getNextEnxtry has been called, the entry metadata is stored in 043 * currentEntry. 044 */ 045 private ArArchiveEntry currentEntry = null; 046 047 // Storage area for extra long names (GNU ar) 048 private byte[] namebuffer = null; 049 050 /* 051 * The offset where the current entry started. -1 if no entry has been 052 * called 053 */ 054 private long entryOffset = -1; 055 056 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 057 private final byte[] NAME_BUF = new byte[16]; 058 private final byte[] LAST_MODIFIED_BUF = new byte[12]; 059 private final byte[] ID_BUF = new byte[6]; 060 private final byte[] FILE_MODE_BUF = new byte[8]; 061 private final byte[] LENGTH_BUF = new byte[10]; 062 063 /** 064 * Constructs an Ar input stream with the referenced stream 065 * 066 * @param pInput 067 * the ar input stream 068 */ 069 public ArArchiveInputStream(final InputStream pInput) { 070 input = pInput; 071 closed = false; 072 } 073 074 /** 075 * Returns the next AR entry in this stream. 076 * 077 * @return the next AR entry. 078 * @throws IOException 079 * if the entry could not be read 080 */ 081 public ArArchiveEntry getNextArEntry() throws IOException { 082 if (currentEntry != null) { 083 final long entryEnd = entryOffset + currentEntry.getLength(); 084 while (offset < entryEnd) { 085 int x = read(); 086 if (x == -1) { 087 // hit EOF before previous entry was complete 088 // TODO: throw an exception instead? 089 return null; 090 } 091 } 092 currentEntry = null; 093 } 094 095 if (offset == 0) { 096 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 097 final byte[] realized = new byte[expected.length]; 098 final int read = read(realized); 099 if (read != expected.length) { 100 throw new IOException("failed to read header. Occured at byte: " + getBytesRead()); 101 } 102 for (int i = 0; i < expected.length; i++) { 103 if (expected[i] != realized[i]) { 104 throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); 105 } 106 } 107 } 108 109 if (offset % 2 != 0 && read() < 0) { 110 // hit eof 111 return null; 112 } 113 114 if (input.available() == 0) { 115 return null; 116 } 117 118 read(NAME_BUF); 119 read(LAST_MODIFIED_BUF); 120 read(ID_BUF); 121 int userId = asInt(ID_BUF, true); 122 read(ID_BUF); 123 read(FILE_MODE_BUF); 124 read(LENGTH_BUF); 125 126 { 127 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 128 final byte[] realized = new byte[expected.length]; 129 final int read = read(realized); 130 if (read != expected.length) { 131 throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead()); 132 } 133 for (int i = 0; i < expected.length; i++) { 134 if (expected[i] != realized[i]) { 135 throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); 136 } 137 } 138 } 139 140 entryOffset = offset; 141 142 // GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 143 144 // entry name is stored as ASCII string 145 String temp = ArchiveUtils.toAsciiString(NAME_BUF).trim(); 146 if (isGNUStringTable(temp)) { // GNU extended filenames entry 147 currentEntry = readGNUStringTable(LENGTH_BUF); 148 return getNextArEntry(); 149 } 150 151 long len = asLong(LENGTH_BUF); 152 if (temp.endsWith("/")) { // GNU terminator 153 temp = temp.substring(0, temp.length() - 1); 154 } else if (isGNULongName(temp)) { 155 int offset = Integer.parseInt(temp.substring(1));// get the offset 156 temp = getExtendedName(offset); // convert to the long name 157 } else if (isBSDLongName(temp)) { 158 temp = getBSDLongName(temp); 159 // entry length contained the length of the file name in 160 // addition to the real length of the entry. 161 // assume file name was ASCII, there is no "standard" otherwise 162 int nameLen = temp.length(); 163 len -= nameLen; 164 entryOffset += nameLen; 165 } 166 167 currentEntry = new ArArchiveEntry(temp, len, userId, 168 asInt(ID_BUF, true), 169 asInt(FILE_MODE_BUF, 8), 170 asLong(LAST_MODIFIED_BUF)); 171 return currentEntry; 172 } 173 174 /** 175 * Get an extended name from the GNU extended name buffer. 176 * 177 * @param offset pointer to entry within the buffer 178 * @return the extended file name; without trailing "/" if present. 179 * @throws IOException if name not found or buffer not set up 180 */ 181 private String getExtendedName(int offset) throws IOException{ 182 if (namebuffer == null) { 183 throw new IOException("Cannot process GNU long filename as no // record was found"); 184 } 185 for(int i=offset; i < namebuffer.length; i++){ 186 if (namebuffer[i]=='\012'){ 187 if (namebuffer[i-1]=='/') { 188 i--; // drop trailing / 189 } 190 return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset); 191 } 192 } 193 throw new IOException("Failed to read entry: "+offset); 194 } 195 private long asLong(byte[] input) { 196 return Long.parseLong(ArchiveUtils.toAsciiString(input).trim()); 197 } 198 199 private int asInt(byte[] input) { 200 return asInt(input, 10, false); 201 } 202 203 private int asInt(byte[] input, boolean treatBlankAsZero) { 204 return asInt(input, 10, treatBlankAsZero); 205 } 206 207 private int asInt(byte[] input, int base) { 208 return asInt(input, base, false); 209 } 210 211 private int asInt(byte[] input, int base, boolean treatBlankAsZero) { 212 String string = ArchiveUtils.toAsciiString(input).trim(); 213 if (string.length() == 0 && treatBlankAsZero) { 214 return 0; 215 } 216 return Integer.parseInt(string, base); 217 } 218 219 /* 220 * (non-Javadoc) 221 * 222 * @see 223 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 224 */ 225 @Override 226 public ArchiveEntry getNextEntry() throws IOException { 227 return getNextArEntry(); 228 } 229 230 /* 231 * (non-Javadoc) 232 * 233 * @see java.io.InputStream#close() 234 */ 235 @Override 236 public void close() throws IOException { 237 if (!closed) { 238 closed = true; 239 input.close(); 240 } 241 currentEntry = null; 242 } 243 244 /* 245 * (non-Javadoc) 246 * 247 * @see java.io.InputStream#read(byte[], int, int) 248 */ 249 @Override 250 public int read(byte[] b, final int off, final int len) throws IOException { 251 int toRead = len; 252 if (currentEntry != null) { 253 final long entryEnd = entryOffset + currentEntry.getLength(); 254 if (len > 0 && entryEnd > offset) { 255 toRead = (int) Math.min(len, entryEnd - offset); 256 } else { 257 return -1; 258 } 259 } 260 final int ret = this.input.read(b, off, toRead); 261 count(ret); 262 offset += (ret > 0 ? ret : 0); 263 return ret; 264 } 265 266 /** 267 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 268 * control character 269 * 270 * @param signature 271 * the bytes to check 272 * @param length 273 * the number of bytes to check 274 * @return true, if this stream is an Ar archive stream, false otherwise 275 */ 276 public static boolean matches(byte[] signature, int length) { 277 // 3c21 7261 6863 0a3e 278 279 if (length < 8) { 280 return false; 281 } 282 if (signature[0] != 0x21) { 283 return false; 284 } 285 if (signature[1] != 0x3c) { 286 return false; 287 } 288 if (signature[2] != 0x61) { 289 return false; 290 } 291 if (signature[3] != 0x72) { 292 return false; 293 } 294 if (signature[4] != 0x63) { 295 return false; 296 } 297 if (signature[5] != 0x68) { 298 return false; 299 } 300 if (signature[6] != 0x3e) { 301 return false; 302 } 303 if (signature[7] != 0x0a) { 304 return false; 305 } 306 307 return true; 308 } 309 310 static final String BSD_LONGNAME_PREFIX = "#1/"; 311 private static final int BSD_LONGNAME_PREFIX_LEN = 312 BSD_LONGNAME_PREFIX.length(); 313 private static final String BSD_LONGNAME_PATTERN = 314 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 315 316 /** 317 * Does the name look like it is a long name (or a name containing 318 * spaces) as encoded by BSD ar? 319 * 320 * <p>From the FreeBSD ar(5) man page:</p> 321 * <pre> 322 * BSD In the BSD variant, names that are shorter than 16 323 * characters and without embedded spaces are stored 324 * directly in this field. If a name has an embedded 325 * space, or if it is longer than 16 characters, then 326 * the string "#1/" followed by the decimal represen- 327 * tation of the length of the file name is placed in 328 * this field. The actual file name is stored immedi- 329 * ately after the archive header. The content of the 330 * archive member follows the file name. The ar_size 331 * field of the header (see below) will then hold the 332 * sum of the size of the file name and the size of 333 * the member. 334 * </pre> 335 * 336 * @since 1.3 337 */ 338 private static boolean isBSDLongName(String name) { 339 return name != null && name.matches(BSD_LONGNAME_PATTERN); 340 } 341 342 /** 343 * Reads the real name from the current stream assuming the very 344 * first bytes to be read are the real file name. 345 * 346 * @see #isBSDLongName 347 * 348 * @since 1.3 349 */ 350 private String getBSDLongName(String bsdLongName) throws IOException { 351 int nameLen = 352 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 353 byte[] name = new byte[nameLen]; 354 int read = 0, readNow = 0; 355 while ((readNow = input.read(name, read, nameLen - read)) >= 0) { 356 read += readNow; 357 count(readNow); 358 if (read == nameLen) { 359 break; 360 } 361 } 362 if (read != nameLen) { 363 throw new EOFException(); 364 } 365 return ArchiveUtils.toAsciiString(name); 366 } 367 368 private static final String GNU_STRING_TABLE_NAME = "//"; 369 370 /** 371 * Is this the name of the "Archive String Table" as used by 372 * SVR4/GNU to store long file names? 373 * 374 * <p>GNU ar stores multiple extended filenames in the data section 375 * of a file with the name "//", this record is referred to by 376 * future headers.</p> 377 * 378 * <p>A header references an extended filename by storing a "/" 379 * followed by a decimal offset to the start of the filename in 380 * the extended filename data section.</p> 381 * 382 * <p>The format of the "//" file itself is simply a list of the 383 * long filenames, each separated by one or more LF 384 * characters. Note that the decimal offsets are number of 385 * characters, not line or string number within the "//" file.</p> 386 */ 387 private static boolean isGNUStringTable(String name) { 388 return GNU_STRING_TABLE_NAME.equals(name); 389 } 390 391 /** 392 * Reads the GNU archive String Table. 393 * 394 * @see #isGNUStringTable 395 */ 396 private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException { 397 int bufflen = asInt(length); // Assume length will fit in an int 398 namebuffer = new byte[bufflen]; 399 int read = read(namebuffer, 0, bufflen); 400 if (read != bufflen){ 401 throw new IOException("Failed to read complete // record: expected=" 402 + bufflen + " read=" + read); 403 } 404 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 405 } 406 407 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 408 409 /** 410 * Does the name look like it is a long name (or a name containing 411 * spaces) as encoded by SVR4/GNU ar? 412 * 413 * @see #isGNUStringTable 414 */ 415 private boolean isGNULongName(String name) { 416 return name != null && name.matches(GNU_LONGNAME_PATTERN); 417 } 418 }