001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023
024import java.io.IOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029
030/**
031 * This class provides static utility methods to work with byte streams.
032 *
033 * @Immutable
034 */
035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036public class TarUtils {
037
038    private static final int BYTE_MASK = 255;
039
040    static final ZipEncoding DEFAULT_ENCODING =
041        ZipEncodingHelper.getZipEncoding(null);
042
043    /**
044     * Encapsulates the algorithms used up to Commons Compress 1.3 as
045     * ZipEncoding.
046     */
047    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048            public boolean canEncode(String name) { return true; }
049
050            public ByteBuffer encode(String name) {
051                final int length = name.length();
052                byte[] buf = new byte[length];
053
054                // copy until end of input or output is reached.
055                for (int i = 0; i < length; ++i) {
056                    buf[i] = (byte) name.charAt(i);
057                }
058                return ByteBuffer.wrap(buf);
059            }
060
061            public String decode(byte[] buffer) {
062                final int length = buffer.length;
063                StringBuilder result = new StringBuilder(length);
064
065                for (byte b : buffer) {
066                    if (b == 0) { // Trailing null
067                        break;
068                    }
069                    result.append((char) (b & 0xFF)); // Allow for sign-extension
070                }
071
072                return result.toString();
073            }
074        };
075
076    /** Private constructor to prevent instantiation of this utility class. */
077    private TarUtils(){
078    }
079
080    /**
081     * Parse an octal string from a buffer.
082     *
083     * <p>Leading spaces are ignored.
084     * The buffer must contain a trailing space or NUL,
085     * and may contain an additional trailing space or NUL.</p>
086     *
087     * <p>The input buffer is allowed to contain all NULs,
088     * in which case the method returns 0L
089     * (this allows for missing fields).</p>
090     *
091     * <p>To work-around some tar implementations that insert a
092     * leading NUL this method returns 0 if it detects a leading NUL
093     * since Commons Compress 1.4.</p>
094     *
095     * @param buffer The buffer from which to parse.
096     * @param offset The offset into the buffer from which to parse.
097     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
098     * @return The long value of the octal string.
099     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
100     */
101    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
102        long    result = 0;
103        int     end = offset + length;
104        int     start = offset;
105
106        if (length < 2){
107            throw new IllegalArgumentException("Length "+length+" must be at least 2");
108        }
109
110        if (buffer[start] == 0) {
111            return 0L;
112        }
113
114        // Skip leading spaces
115        while (start < end){
116            if (buffer[start] == ' '){
117                start++;
118            } else {
119                break;
120            }
121        }
122
123        // Trim all trailing NULs and spaces.
124        // The ustar and POSIX tar specs require a trailing NUL or
125        // space but some implementations use the extra digit for big
126        // sizes/uids/gids ...
127        byte trailer = buffer[end - 1];
128        while (start < end && (trailer == 0 || trailer == ' ')) {
129            end--;
130            trailer = buffer[end - 1];
131        }
132
133        for ( ;start < end; start++) {
134            final byte currentByte = buffer[start];
135            // CheckStyle:MagicNumber OFF
136            if (currentByte < '0' || currentByte > '7'){
137                throw new IllegalArgumentException(
138                        exceptionMessage(buffer, offset, length, start, currentByte));
139            }
140            result = (result << 3) + (currentByte - '0'); // convert from ASCII
141            // CheckStyle:MagicNumber ON
142        }
143
144        return result;
145    }
146
147    /** 
148     * Compute the value contained in a byte buffer.  If the most
149     * significant bit of the first byte in the buffer is set, this
150     * bit is ignored and the rest of the buffer is interpreted as a
151     * binary number.  Otherwise, the buffer is interpreted as an
152     * octal number as per the parseOctal function above.
153     *
154     * @param buffer The buffer from which to parse.
155     * @param offset The offset into the buffer from which to parse.
156     * @param length The maximum number of bytes to parse.
157     * @return The long value of the octal or binary string.
158     * @throws IllegalArgumentException if the trailing space/NUL is
159     * missing or an invalid byte is detected in an octal number, or
160     * if a binary number would exceed the size of a signed long
161     * 64-bit integer.
162     * @since 1.4
163     */
164    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
165                                          final int length) {
166
167        if ((buffer[offset] & 0x80) == 0) {
168            return parseOctal(buffer, offset, length);
169        }
170        final boolean negative = buffer[offset] == (byte) 0xff;
171        if (length < 9) {
172            return parseBinaryLong(buffer, offset, length, negative);
173        }
174        return parseBinaryBigInteger(buffer, offset, length, negative);
175    }
176
177    private static long parseBinaryLong(final byte[] buffer, final int offset,
178                                        final int length,
179                                        final boolean negative) {
180        if (length >= 9) {
181            throw new IllegalArgumentException("At offset " + offset + ", "
182                                               + length + " byte binary number"
183                                               + " exceeds maximum signed long"
184                                               + " value");
185        }
186        long val = 0;
187        for (int i = 1; i < length; i++) {
188            val = (val << 8) + (buffer[offset + i] & 0xff);
189        }
190        if (negative) {
191            // 2's complement
192            val--;
193            val ^= (long) Math.pow(2, (length - 1) * 8) - 1;
194        }
195        return negative ? -val : val;
196    }
197
198    private static long parseBinaryBigInteger(final byte[] buffer,
199                                              final int offset,
200                                              final int length,
201                                              final boolean negative) {
202        byte[] remainder = new byte[length - 1];
203        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
204        BigInteger val = new BigInteger(remainder);
205        if (negative) {
206            // 2's complement
207            val = val.add(BigInteger.valueOf(-1)).not();
208        }
209        if (val.bitLength() > 63) {
210            throw new IllegalArgumentException("At offset " + offset + ", "
211                                               + length + " byte binary number"
212                                               + " exceeds maximum signed long"
213                                               + " value");
214        }
215        return negative ? -val.longValue() : val.longValue();
216    }
217
218    /**
219     * Parse a boolean byte from a buffer.
220     * Leading spaces and NUL are ignored.
221     * The buffer may contain trailing spaces or NULs.
222     *
223     * @param buffer The buffer from which to parse.
224     * @param offset The offset into the buffer from which to parse.
225     * @return The boolean value of the bytes.
226     * @throws IllegalArgumentException if an invalid byte is detected.
227     */
228    public static boolean parseBoolean(final byte[] buffer, final int offset) {
229        return buffer[offset] == 1;
230    }
231
232    // Helper method to generate the exception message
233    private static String exceptionMessage(byte[] buffer, final int offset,
234            final int length, int current, final byte currentByte) {
235        // default charset is good enough for an exception message,
236        //
237        // the alternative was to modify parseOctal and
238        // parseOctalOrBinary to receive the ZipEncoding of the
239        // archive (deprecating the existing public methods, of
240        // course) and dealing with the fact that ZipEncoding#decode
241        // can throw an IOException which parseOctal* doesn't declare
242        String string = new String(buffer, offset, length);
243
244        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
245        final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
246        return s;
247    }
248
249    /**
250     * Parse an entry name from a buffer.
251     * Parsing stops when a NUL is found
252     * or the buffer length is reached.
253     *
254     * @param buffer The buffer from which to parse.
255     * @param offset The offset into the buffer from which to parse.
256     * @param length The maximum number of bytes to parse.
257     * @return The entry name.
258     */
259    public static String parseName(byte[] buffer, final int offset, final int length) {
260        try {
261            return parseName(buffer, offset, length, DEFAULT_ENCODING);
262        } catch (IOException ex) {
263            try {
264                return parseName(buffer, offset, length, FALLBACK_ENCODING);
265            } catch (IOException ex2) {
266                // impossible
267                throw new RuntimeException(ex2);
268            }
269        }
270    }
271
272    /**
273     * Parse an entry name from a buffer.
274     * Parsing stops when a NUL is found
275     * or the buffer length is reached.
276     *
277     * @param buffer The buffer from which to parse.
278     * @param offset The offset into the buffer from which to parse.
279     * @param length The maximum number of bytes to parse.
280     * @param encoding name of the encoding to use for file names
281     * @since 1.4
282     * @return The entry name.
283     * @throws IOException on error
284     */
285    public static String parseName(byte[] buffer, final int offset,
286                                   final int length,
287                                   final ZipEncoding encoding)
288        throws IOException {
289
290        int len = length;
291        for (; len > 0; len--) {
292            if (buffer[offset + len - 1] != 0) {
293                break;
294            }
295        }
296        if (len > 0) {
297            byte[] b = new byte[len];
298            System.arraycopy(buffer, offset, b, 0, len);
299            return encoding.decode(b);
300        }
301        return "";
302    }
303
304    /**
305     * Copy a name into a buffer.
306     * Copies characters from the name into the buffer
307     * starting at the specified offset. 
308     * If the buffer is longer than the name, the buffer
309     * is filled with trailing NULs.
310     * If the name is longer than the buffer,
311     * the output is truncated.
312     *
313     * @param name The header name from which to copy the characters.
314     * @param buf The buffer where the name is to be stored.
315     * @param offset The starting offset into the buffer
316     * @param length The maximum number of header bytes to copy.
317     * @return The updated offset, i.e. offset + length
318     */
319    public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
320        try {
321            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
322        } catch (IOException ex) {
323            try {
324                return formatNameBytes(name, buf, offset, length,
325                                       FALLBACK_ENCODING);
326            } catch (IOException ex2) {
327                // impossible
328                throw new RuntimeException(ex2);
329            }
330        }
331    }
332
333    /**
334     * Copy a name into a buffer.
335     * Copies characters from the name into the buffer
336     * starting at the specified offset. 
337     * If the buffer is longer than the name, the buffer
338     * is filled with trailing NULs.
339     * If the name is longer than the buffer,
340     * the output is truncated.
341     *
342     * @param name The header name from which to copy the characters.
343     * @param buf The buffer where the name is to be stored.
344     * @param offset The starting offset into the buffer
345     * @param length The maximum number of header bytes to copy.
346     * @param encoding name of the encoding to use for file names
347     * @since 1.4
348     * @return The updated offset, i.e. offset + length
349     * @throws IOException on error
350     */
351    public static int formatNameBytes(String name, byte[] buf, final int offset,
352                                      final int length,
353                                      final ZipEncoding encoding)
354        throws IOException {
355        int len = name.length();
356        ByteBuffer b = encoding.encode(name);
357        while (b.limit() > length && len > 0) {
358            b = encoding.encode(name.substring(0, --len));
359        }
360        final int limit = b.limit() - b.position();
361        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
362
363        // Pad any remaining output bytes with NUL
364        for (int i = limit; i < length; ++i) {
365            buf[offset + i] = 0;
366        }
367
368        return offset + length;
369    }
370
371    /**
372     * Fill buffer with unsigned octal number, padded with leading zeroes.
373     * 
374     * @param value number to convert to octal - treated as unsigned
375     * @param buffer destination buffer
376     * @param offset starting offset in buffer
377     * @param length length of buffer to fill
378     * @throws IllegalArgumentException if the value will not fit in the buffer
379     */
380    public static void formatUnsignedOctalString(final long value, byte[] buffer,
381            final int offset, final int length) {
382        int remaining = length;
383        remaining--;
384        if (value == 0) {
385            buffer[offset + remaining--] = (byte) '0';
386        } else {
387            long val = value;
388            for (; remaining >= 0 && val != 0; --remaining) {
389                // CheckStyle:MagicNumber OFF
390                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
391                val = val >>> 3;
392                // CheckStyle:MagicNumber ON
393            }
394            if (val != 0){
395                throw new IllegalArgumentException
396                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
397            }
398        }
399
400        for (; remaining >= 0; --remaining) { // leading zeros
401            buffer[offset + remaining] = (byte) '0';
402        }
403    }
404
405    /**
406     * Write an octal integer into a buffer.
407     *
408     * Uses {@link #formatUnsignedOctalString} to format
409     * the value as an octal string with leading zeros.
410     * The converted number is followed by space and NUL
411     * 
412     * @param value The value to write
413     * @param buf The buffer to receive the output
414     * @param offset The starting offset into the buffer
415     * @param length The size of the output buffer
416     * @return The updated offset, i.e offset+length
417     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
418     */
419    public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) {
420
421        int idx=length-2; // For space and trailing null
422        formatUnsignedOctalString(value, buf, offset, idx);
423
424        buf[offset + idx++] = (byte) ' '; // Trailing space
425        buf[offset + idx]   = 0; // Trailing null
426
427        return offset + length;
428    }
429
430    /**
431     * Write an octal long integer into a buffer.
432     * 
433     * Uses {@link #formatUnsignedOctalString} to format
434     * the value as an octal string with leading zeros.
435     * The converted number is followed by a space.
436     * 
437     * @param value The value to write as octal
438     * @param buf The destinationbuffer.
439     * @param offset The starting offset into the buffer.
440     * @param length The length of the buffer
441     * @return The updated offset
442     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
443     */
444    public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
445
446        int idx=length-1; // For space
447
448        formatUnsignedOctalString(value, buf, offset, idx);
449        buf[offset + idx] = (byte) ' '; // Trailing space
450
451        return offset + length;
452    }
453
454    /**
455     * Write an long integer into a buffer as an octal string if this
456     * will fit, or as a binary number otherwise.
457     * 
458     * Uses {@link #formatUnsignedOctalString} to format
459     * the value as an octal string with leading zeros.
460     * The converted number is followed by a space.
461     * 
462     * @param value The value to write into the buffer.
463     * @param buf The destination buffer.
464     * @param offset The starting offset into the buffer.
465     * @param length The length of the buffer.
466     * @return The updated offset.
467     * @throws IllegalArgumentException if the value (and trailer)
468     * will not fit in the buffer.
469     * @since 1.4
470     */
471    public static int formatLongOctalOrBinaryBytes(
472        final long value, byte[] buf, final int offset, final int length) {
473
474        // Check whether we are dealing with UID/GID or SIZE field
475        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
476
477        final boolean negative = value < 0;
478        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
479            return formatLongOctalBytes(value, buf, offset, length);
480        }
481
482        if (length < 9) {
483            formatLongBinary(value, buf, offset, length, negative);
484        }
485        formatBigIntegerBinary(value, buf, offset, length, negative);
486
487        buf[offset] = (byte) (negative ? 0xff : 0x80);
488        return offset + length;
489    }
490
491    private static void formatLongBinary(final long value, byte[] buf,
492                                         final int offset, final int length,
493                                         final boolean negative) {
494        final int bits = (length - 1) * 8;
495        final long max = 1l << bits;
496        long val = Math.abs(value);
497        if (val >= max) {
498            throw new IllegalArgumentException("Value " + value +
499                " is too large for " + length + " byte field.");
500        }
501        if (negative) {
502            val ^= max - 1;
503            val |= 0xff << bits;
504            val++;
505        }
506        for (int i = offset + length - 1; i >= offset; i--) {
507            buf[i] = (byte) val;
508            val >>= 8;
509        }
510    }
511
512    private static void formatBigIntegerBinary(final long value, byte[] buf,
513                                               final int offset,
514                                               final int length,
515                                               final boolean negative) {
516        BigInteger val = BigInteger.valueOf(value);
517        final byte[] b = val.toByteArray();
518        final int len = b.length;
519        final int off = offset + length - len;
520        System.arraycopy(b, 0, buf, off, len);
521        final byte fill = (byte) (negative ? 0xff : 0);
522        for (int i = offset + 1; i < off; i++) {
523            buf[i] = fill;
524        }
525    }
526
527    /**
528     * Writes an octal value into a buffer.
529     * 
530     * Uses {@link #formatUnsignedOctalString} to format
531     * the value as an octal string with leading zeros.
532     * The converted number is followed by NUL and then space.
533     *
534     * @param value The value to convert
535     * @param buf The destination buffer
536     * @param offset The starting offset into the buffer.
537     * @param length The size of the buffer.
538     * @return The updated value of offset, i.e. offset+length
539     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
540     */
541    public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
542
543        int idx=length-2; // for NUL and space
544        formatUnsignedOctalString(value, buf, offset, idx);
545
546        buf[offset + idx++]   = 0; // Trailing null
547        buf[offset + idx]     = (byte) ' '; // Trailing space
548
549        return offset + length;
550    }
551
552    /**
553     * Compute the checksum of a tar entry header.
554     *
555     * @param buf The tar entry's header buffer.
556     * @return The computed checksum.
557     */
558    public static long computeCheckSum(final byte[] buf) {
559        long sum = 0;
560
561        for (byte element : buf) {
562            sum += BYTE_MASK & element;
563        }
564
565        return sum;
566    }
567
568    /**
569     * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
570     * <blockquote>
571     * The checksum is calculated by taking the sum of the unsigned byte values
572     * of the header block with the eight checksum bytes taken to be ascii
573     * spaces (decimal value 32). It is stored as a six digit octal number with
574     * leading zeroes followed by a NUL and then a space. Various
575     * implementations do not adhere to this format. For better compatibility,
576     * ignore leading and trailing whitespace, and get the first six digits. In
577     * addition, some historic tar implementations treated bytes as signed.
578     * Implementations typically calculate the checksum both ways, and treat it
579     * as good if either the signed or unsigned sum matches the included
580     * checksum.
581     * </blockquote>
582     * <p>
583     * In addition there are
584     * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
585     * that seem to have parts of their header cleared to zero (no detectable
586     * magic bytes, etc.) but still have a reasonable-looking checksum field
587     * present. It looks like we can detect such cases reasonably well by
588     * checking whether the stored checksum is <em>greater than</em> the
589     * computed unsigned checksum. That check is unlikely to pass on some
590     * random file header, as it would need to have a valid sequence of
591     * octal digits in just the right place.
592     * <p>
593     * The return value of this method should be treated as a best-effort
594     * heuristic rather than an absolute and final truth. The checksum
595     * verification logic may well evolve over time as more special cases
596     * are encountered.
597     *
598     * @param header tar header
599     * @return whether the checksum is reasonably good
600     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
601     * @since 1.5
602     */
603    public static boolean verifyCheckSum(byte[] header) {
604        long storedSum = 0;
605        long unsignedSum = 0;
606        long signedSum = 0;
607
608        int digits = 0;
609        for (int i = 0; i < header.length; i++) {
610            byte b = header[i];
611            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
612                if ('0' <= b && b <= '7' && digits++ < 6) {
613                    storedSum = storedSum * 8 + b - '0';
614                } else if (digits > 0) {
615                    digits = 6; // only look at the first octal digit sequence
616                }
617                b = ' ';
618            }
619            unsignedSum += 0xff & b;
620            signedSum += b;
621        }
622
623        return storedSum == unsignedSum || storedSum == signedSum
624                || storedSum > unsignedSum; // COMPRESS-177
625    }
626
627}