001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import org.apache.commons.compress.archivers.ArchiveException; 022import org.apache.commons.compress.archivers.ArchiveInputStream; 023import org.apache.commons.compress.archivers.zip.ZipEncoding; 024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 025 026import java.io.EOFException; 027import java.io.IOException; 028import java.io.InputStream; 029 030import java.util.Arrays; 031import java.util.Comparator; 032import java.util.HashMap; 033import java.util.Map; 034import java.util.PriorityQueue; 035import java.util.Queue; 036import java.util.Stack; 037 038/** 039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 040 * Methods are provided to position at each successive entry in 041 * the archive, and the read each entry as a normal input stream 042 * using read(). 043 * 044 * There doesn't seem to exist a hint on the encoding of string values 045 * in any piece documentation. Given the main purpose of dump/restore 046 * is backing up a system it seems very likely the format uses the 047 * current default encoding of the system. 048 * 049 * @NotThreadSafe 050 */ 051public class DumpArchiveInputStream extends ArchiveInputStream { 052 private DumpArchiveSummary summary; 053 private DumpArchiveEntry active; 054 private boolean isClosed; 055 private boolean hasHitEOF; 056 private long entrySize; 057 private long entryOffset; 058 private int readIdx; 059 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 060 private byte[] blockBuffer; 061 private int recordOffset; 062 private long filepos; 063 protected TapeInputStream raw; 064 065 // map of ino -> dirent entry. We can use this to reconstruct full paths. 066 private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>(); 067 068 // map of ino -> (directory) entry when we're missing one or more elements in the path. 069 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>(); 070 071 // queue of (directory) entries where we now have the full path. 072 private Queue<DumpArchiveEntry> queue; 073 074 /** 075 * The encoding to use for filenames and labels. 076 */ 077 private final ZipEncoding zipEncoding; 078 079 // the provided encoding (for unit tests) 080 final String encoding; 081 082 /** 083 * Constructor using the platform's default encoding for file 084 * names. 085 * 086 * @param is stream to read from 087 * @throws ArchiveException on error 088 */ 089 public DumpArchiveInputStream(InputStream is) throws ArchiveException { 090 this(is, null); 091 } 092 093 /** 094 * Constructor. 095 * 096 * @param is stream to read from 097 * @param encoding the encoding to use for file names, use null 098 * for the platform's default encoding 099 * @since 1.6 100 * @throws ArchiveException on error 101 */ 102 public DumpArchiveInputStream(InputStream is, String encoding) 103 throws ArchiveException { 104 this.raw = new TapeInputStream(is); 105 this.hasHitEOF = false; 106 this.encoding = encoding; 107 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 108 109 try { 110 // read header, verify it's a dump archive. 111 byte[] headerBytes = raw.readRecord(); 112 113 if (!DumpArchiveUtil.verify(headerBytes)) { 114 throw new UnrecognizedFormatException(); 115 } 116 117 // get summary information 118 summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); 119 120 // reset buffer with actual block size. 121 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 122 123 // allocate our read buffer. 124 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 125 126 // skip past CLRI and BITS segments since we don't handle them yet. 127 readCLRI(); 128 readBITS(); 129 } catch (IOException ex) { 130 throw new ArchiveException(ex.getMessage(), ex); 131 } 132 133 // put in a dummy record for the root node. 134 Dirent root = new Dirent(2, 2, 4, "."); 135 names.put(2, root); 136 137 // use priority based on queue to ensure parent directories are 138 // released first. 139 queue = new PriorityQueue<DumpArchiveEntry>(10, 140 new Comparator<DumpArchiveEntry>() { 141 public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { 142 if (p.getOriginalName() == null || q.getOriginalName() == null) { 143 return Integer.MAX_VALUE; 144 } 145 146 return p.getOriginalName().compareTo(q.getOriginalName()); 147 } 148 }); 149 } 150 151 @Deprecated 152 @Override 153 public int getCount() { 154 return (int) getBytesRead(); 155 } 156 157 @Override 158 public long getBytesRead() { 159 return raw.getBytesRead(); 160 } 161 162 /** 163 * Return the archive summary information. 164 * @return the summary 165 */ 166 public DumpArchiveSummary getSummary() { 167 return summary; 168 } 169 170 /** 171 * Read CLRI (deleted inode) segment. 172 */ 173 private void readCLRI() throws IOException { 174 byte[] buffer = raw.readRecord(); 175 176 if (!DumpArchiveUtil.verify(buffer)) { 177 throw new InvalidFormatException(); 178 } 179 180 active = DumpArchiveEntry.parse(buffer); 181 182 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 183 throw new InvalidFormatException(); 184 } 185 186 // we don't do anything with this yet. 187 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 188 == -1) { 189 throw new EOFException(); 190 } 191 readIdx = active.getHeaderCount(); 192 } 193 194 /** 195 * Read BITS segment. 196 */ 197 private void readBITS() throws IOException { 198 byte[] buffer = raw.readRecord(); 199 200 if (!DumpArchiveUtil.verify(buffer)) { 201 throw new InvalidFormatException(); 202 } 203 204 active = DumpArchiveEntry.parse(buffer); 205 206 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 207 throw new InvalidFormatException(); 208 } 209 210 // we don't do anything with this yet. 211 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 212 == -1) { 213 throw new EOFException(); 214 } 215 readIdx = active.getHeaderCount(); 216 } 217 218 /** 219 * Read the next entry. 220 * @return the next entry 221 * @throws IOException on error 222 */ 223 public DumpArchiveEntry getNextDumpEntry() throws IOException { 224 return getNextEntry(); 225 } 226 227 @Override 228 public DumpArchiveEntry getNextEntry() throws IOException { 229 DumpArchiveEntry entry = null; 230 String path = null; 231 232 // is there anything in the queue? 233 if (!queue.isEmpty()) { 234 return queue.remove(); 235 } 236 237 while (entry == null) { 238 if (hasHitEOF) { 239 return null; 240 } 241 242 // skip any remaining records in this segment for prior file. 243 // we might still have holes... easiest to do it 244 // block by block. We may want to revisit this if 245 // the unnecessary decompression time adds up. 246 while (readIdx < active.getHeaderCount()) { 247 if (!active.isSparseRecord(readIdx++) 248 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 249 throw new EOFException(); 250 } 251 } 252 253 readIdx = 0; 254 filepos = raw.getBytesRead(); 255 256 byte[] headerBytes = raw.readRecord(); 257 258 if (!DumpArchiveUtil.verify(headerBytes)) { 259 throw new InvalidFormatException(); 260 } 261 262 active = DumpArchiveEntry.parse(headerBytes); 263 264 // skip any remaining segments for prior file. 265 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 266 if (raw.skip(DumpArchiveConstants.TP_SIZE 267 * (active.getHeaderCount() 268 - active.getHeaderHoles())) == -1) { 269 throw new EOFException(); 270 } 271 272 filepos = raw.getBytesRead(); 273 headerBytes = raw.readRecord(); 274 275 if (!DumpArchiveUtil.verify(headerBytes)) { 276 throw new InvalidFormatException(); 277 } 278 279 active = DumpArchiveEntry.parse(headerBytes); 280 } 281 282 // check if this is an end-of-volume marker. 283 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 284 hasHitEOF = true; 285 286 return null; 287 } 288 289 entry = active; 290 291 if (entry.isDirectory()) { 292 readDirectoryEntry(active); 293 294 // now we create an empty InputStream. 295 entryOffset = 0; 296 entrySize = 0; 297 readIdx = active.getHeaderCount(); 298 } else { 299 entryOffset = 0; 300 entrySize = active.getEntrySize(); 301 readIdx = 0; 302 } 303 304 recordOffset = readBuf.length; 305 306 path = getPath(entry); 307 308 if (path == null) { 309 entry = null; 310 } 311 } 312 313 entry.setName(path); 314 entry.setSimpleName(names.get(entry.getIno()).getName()); 315 entry.setOffset(filepos); 316 317 return entry; 318 } 319 320 /** 321 * Read directory entry. 322 */ 323 private void readDirectoryEntry(DumpArchiveEntry entry) 324 throws IOException { 325 long size = entry.getEntrySize(); 326 boolean first = true; 327 328 while (first || 329 DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 330 // read the header that we just peeked at. 331 if (!first) { 332 raw.readRecord(); 333 } 334 335 if (!names.containsKey(entry.getIno()) && 336 DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 337 pending.put(entry.getIno(), entry); 338 } 339 340 int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 341 342 if (blockBuffer.length < datalen) { 343 blockBuffer = new byte[datalen]; 344 } 345 346 if (raw.read(blockBuffer, 0, datalen) != datalen) { 347 throw new EOFException(); 348 } 349 350 int reclen = 0; 351 352 for (int i = 0; i < datalen - 8 && i < size - 8; 353 i += reclen) { 354 int ino = DumpArchiveUtil.convert32(blockBuffer, i); 355 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 356 357 byte type = blockBuffer[i + 6]; 358 359 String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); 360 361 if (".".equals(name) || "..".equals(name)) { 362 // do nothing... 363 continue; 364 } 365 366 Dirent d = new Dirent(ino, entry.getIno(), type, name); 367 368 /* 369 if ((type == 4) && names.containsKey(ino)) { 370 System.out.println("we already have ino: " + 371 names.get(ino)); 372 } 373 */ 374 375 names.put(ino, d); 376 377 // check whether this allows us to fill anything in the pending list. 378 for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { 379 String path = getPath(e.getValue()); 380 381 if (path != null) { 382 e.getValue().setName(path); 383 e.getValue() 384 .setSimpleName(names.get(e.getKey()).getName()); 385 queue.add(e.getValue()); 386 } 387 } 388 389 // remove anything that we found. (We can't do it earlier 390 // because of concurrent modification exceptions.) 391 for (DumpArchiveEntry e : queue) { 392 pending.remove(e.getIno()); 393 } 394 } 395 396 byte[] peekBytes = raw.peek(); 397 398 if (!DumpArchiveUtil.verify(peekBytes)) { 399 throw new InvalidFormatException(); 400 } 401 402 entry = DumpArchiveEntry.parse(peekBytes); 403 first = false; 404 size -= DumpArchiveConstants.TP_SIZE; 405 } 406 } 407 408 /** 409 * Get full path for specified archive entry, or null if there's a gap. 410 * 411 * @param entry 412 * @return full path for specified archive entry, or null if there's a gap. 413 */ 414 private String getPath(DumpArchiveEntry entry) { 415 // build the stack of elements. It's possible that we're 416 // still missing an intermediate value and if so we 417 Stack<String> elements = new Stack<String>(); 418 Dirent dirent = null; 419 420 for (int i = entry.getIno();; i = dirent.getParentIno()) { 421 if (!names.containsKey(i)) { 422 elements.clear(); 423 break; 424 } 425 426 dirent = names.get(i); 427 elements.push(dirent.getName()); 428 429 if (dirent.getIno() == dirent.getParentIno()) { 430 break; 431 } 432 } 433 434 // if an element is missing defer the work and read next entry. 435 if (elements.isEmpty()) { 436 pending.put(entry.getIno(), entry); 437 438 return null; 439 } 440 441 // generate full path from stack of elements. 442 StringBuilder sb = new StringBuilder(elements.pop()); 443 444 while (!elements.isEmpty()) { 445 sb.append('/'); 446 sb.append(elements.pop()); 447 } 448 449 return sb.toString(); 450 } 451 452 /** 453 * Reads bytes from the current dump archive entry. 454 * 455 * This method is aware of the boundaries of the current 456 * entry in the archive and will deal with them as if they 457 * were this stream's start and EOF. 458 * 459 * @param buf The buffer into which to place bytes read. 460 * @param off The offset at which to place bytes read. 461 * @param len The number of bytes to read. 462 * @return The number of bytes read, or -1 at EOF. 463 * @throws IOException on error 464 */ 465 @Override 466 public int read(byte[] buf, int off, int len) throws IOException { 467 int totalRead = 0; 468 469 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 470 return -1; 471 } 472 473 if (active == null) { 474 throw new IllegalStateException("No current dump entry"); 475 } 476 477 if (len + entryOffset > entrySize) { 478 len = (int) (entrySize - entryOffset); 479 } 480 481 while (len > 0) { 482 int sz = len > readBuf.length - recordOffset 483 ? readBuf.length - recordOffset : len; 484 485 // copy any data we have 486 if (recordOffset + sz <= readBuf.length) { 487 System.arraycopy(readBuf, recordOffset, buf, off, sz); 488 totalRead += sz; 489 recordOffset += sz; 490 len -= sz; 491 off += sz; 492 } 493 494 // load next block if necessary. 495 if (len > 0) { 496 if (readIdx >= 512) { 497 byte[] headerBytes = raw.readRecord(); 498 499 if (!DumpArchiveUtil.verify(headerBytes)) { 500 throw new InvalidFormatException(); 501 } 502 503 active = DumpArchiveEntry.parse(headerBytes); 504 readIdx = 0; 505 } 506 507 if (!active.isSparseRecord(readIdx++)) { 508 int r = raw.read(readBuf, 0, readBuf.length); 509 if (r != readBuf.length) { 510 throw new EOFException(); 511 } 512 } else { 513 Arrays.fill(readBuf, (byte) 0); 514 } 515 516 recordOffset = 0; 517 } 518 } 519 520 entryOffset += totalRead; 521 522 return totalRead; 523 } 524 525 /** 526 * Closes the stream for this entry. 527 */ 528 @Override 529 public void close() throws IOException { 530 if (!isClosed) { 531 isClosed = true; 532 raw.close(); 533 } 534 } 535 536 /** 537 * Look at the first few bytes of the file to decide if it's a dump 538 * archive. With 32 bytes we can look at the magic value, with a full 539 * 1k we can verify the checksum. 540 * @param buffer data to match 541 * @param length length of data 542 * @return whether the buffer seems to contain dump data 543 */ 544 public static boolean matches(byte[] buffer, int length) { 545 // do we have enough of the header? 546 if (length < 32) { 547 return false; 548 } 549 550 // this is the best test 551 if (length >= DumpArchiveConstants.TP_SIZE) { 552 return DumpArchiveUtil.verify(buffer); 553 } 554 555 // this will work in a pinch. 556 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 557 24); 558 } 559 560}