NameConflictTreeWalk.java

  1. /*
  2.  * Copyright (C) 2008, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.treewalk;

  11. import java.io.IOException;

  12. import org.eclipse.jgit.annotations.Nullable;
  13. import org.eclipse.jgit.errors.CorruptObjectException;
  14. import org.eclipse.jgit.lib.FileMode;
  15. import org.eclipse.jgit.lib.ObjectReader;
  16. import org.eclipse.jgit.lib.Repository;

  17. /**
  18.  * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
  19.  * <p>
  20.  * Due to the way a Git tree is organized the standard
  21.  * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
  22.  * when merging two or more trees together. In the standard TreeWalk the file
  23.  * will be returned first, and then much later the directory will be returned.
  24.  * This makes it impossible for the application to efficiently detect and handle
  25.  * the conflict.
  26.  * <p>
  27.  * Using this walk implementation causes the directory to report earlier than
  28.  * usual, at the same time as the non-directory entry. This permits the
  29.  * application to handle the D/F conflict in a single step. The directory is
  30.  * returned only once, so it does not get returned later in the iteration.
  31.  * <p>
  32.  * When a D/F conflict is detected
  33.  * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
  34.  * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
  35.  * the subtree, no matter which iterator originally supplied the subtree.
  36.  * <p>
  37.  * Because conflicted directories report early, using this walk implementation
  38.  * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
  39.  * automatic resorting to run and fix the entry ordering.
  40.  * <p>
  41.  * This walk implementation requires more CPU to implement a look-ahead and a
  42.  * look-behind to merge a D/F pair together, or to skip a previously reported
  43.  * directory. In typical Git repositories the look-ahead cost is 0 and the
  44.  * look-behind doesn't trigger, as users tend not to create trees which contain
  45.  * both "foo" as a directory and "foo.c" as a file.
  46.  * <p>
  47.  * In the worst-case however several thousand look-ahead steps per walk step may
  48.  * be necessary, making the overhead quite significant. Since this worst-case
  49.  * should never happen this walk implementation has made the time/space tradeoff
  50.  * in favor of more-time/less-space, as that better suits the typical case.
  51.  */
  52. public class NameConflictTreeWalk extends TreeWalk {
  53.     private static final int TREE_MODE = FileMode.TREE.getBits();

  54.     /**
  55.      * True if all {@link #trees} point to entries with equal names.
  56.      *
  57.      * If at least one tree iterator point to a different name or
  58.      * reached end of the tree, the value is false.
  59.      * Note: if all iterators reached end of trees, the value is true.
  60.      */
  61.     private boolean allTreesNamesMatchFastMinRef;

  62.     private AbstractTreeIterator dfConflict;

  63.     /**
  64.      * Create a new tree walker for a given repository.
  65.      *
  66.      * @param repo
  67.      *            the repository the walker will obtain data from.
  68.      */
  69.     public NameConflictTreeWalk(Repository repo) {
  70.         super(repo);
  71.     }

  72.     /**
  73.      * Create a new tree walker for a given repository.
  74.      *
  75.      * @param repo
  76.      *            the repository the walker will obtain data from.
  77.      * @param or
  78.      *            the reader the walker will obtain tree data from.
  79.      * @since 4.3
  80.      */
  81.     public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
  82.         super(repo, or);
  83.     }

  84.     /**
  85.      * Create a new tree walker for a given repository.
  86.      *
  87.      * @param or
  88.      *            the reader the walker will obtain tree data from.
  89.      */
  90.     public NameConflictTreeWalk(ObjectReader or) {
  91.         super(or);
  92.     }

  93.     @Override
  94.     AbstractTreeIterator min() throws CorruptObjectException {
  95.         for (;;) {
  96.             final AbstractTreeIterator minRef = fastMin();
  97.             if (allTreesNamesMatchFastMinRef)
  98.                 return minRef;

  99.             if (isTree(minRef)) {
  100.                 if (skipEntry(minRef)) {
  101.                     for (AbstractTreeIterator t : trees) {
  102.                         if (t.matches == minRef) {
  103.                             t.next(1);
  104.                             t.matches = null;
  105.                         }
  106.                     }
  107.                     continue;
  108.                 }
  109.                 return minRef;
  110.             }

  111.             return combineDF(minRef);
  112.         }
  113.     }

  114.     private AbstractTreeIterator fastMin() {
  115.         int i = getFirstNonEofTreeIndex();
  116.         if (i == -1) {
  117.             // All trees reached the end.
  118.             allTreesNamesMatchFastMinRef = true;
  119.             return trees[trees.length - 1];
  120.         }
  121.         AbstractTreeIterator minRef = trees[i];
  122.         // if i > 0 then we already know that only some trees reached the end
  123.         // (but not all), so it is impossible that ALL trees points to the
  124.         // minRef entry.
  125.         // Only if i == 0 it is still possible that all trees points to the same
  126.         // minRef entry.
  127.         allTreesNamesMatchFastMinRef = i == 0;
  128.         boolean hasConflict = false;
  129.         minRef.matches = minRef;
  130.         while (++i < trees.length) {
  131.             final AbstractTreeIterator t = trees[i];
  132.             if (t.eof()) {
  133.                 allTreesNamesMatchFastMinRef = false;
  134.                 continue;
  135.             }

  136.             final int cmp = t.pathCompare(minRef);
  137.             if (cmp < 0) {
  138.                 if (allTreesNamesMatchFastMinRef && isTree(minRef) && !isTree(t)
  139.                         && nameEqual(minRef, t)) {
  140.                     // We used to be at a tree, but now we are at a file
  141.                     // with the same name. Allow the file to match the
  142.                     // tree anyway.
  143.                     //
  144.                     t.matches = minRef;
  145.                     hasConflict = true;
  146.                 } else {
  147.                     allTreesNamesMatchFastMinRef = false;
  148.                     t.matches = t;
  149.                     minRef = t;
  150.                 }
  151.             } else if (cmp == 0) {
  152.                 // Exact name/mode match is best.
  153.                 //
  154.                 t.matches = minRef;
  155.             } else if (allTreesNamesMatchFastMinRef && isTree(t)
  156.                     && !isTree(minRef) && !isGitlink(minRef)
  157.                     && nameEqual(t, minRef)) {
  158.                 // The minimum is a file (non-tree) but the next entry
  159.                 // of this iterator is a tree whose name matches our file.
  160.                 // This is a classic D/F conflict and commonly occurs like
  161.                 // this, with no gaps in between the file and directory.
  162.                 //
  163.                 // Use the tree as the minimum instead (see combineDF).
  164.                 //

  165.                 for (int k = 0; k < i; k++) {
  166.                     final AbstractTreeIterator p = trees[k];
  167.                     if (p.matches == minRef)
  168.                         p.matches = t;
  169.                 }
  170.                 t.matches = t;
  171.                 minRef = t;
  172.                 hasConflict = true;
  173.             } else
  174.                 allTreesNamesMatchFastMinRef = false;
  175.         }

  176.         if (hasConflict && allTreesNamesMatchFastMinRef && dfConflict == null)
  177.             dfConflict = minRef;
  178.         return minRef;
  179.     }

  180.     private int getFirstNonEofTreeIndex() {
  181.         for (int i = 0; i < trees.length; i++) {
  182.             if (!trees[i].eof()) {
  183.                 return i;
  184.             }
  185.         }
  186.         return -1;
  187.     }

  188.     private static boolean nameEqual(final AbstractTreeIterator a,
  189.             final AbstractTreeIterator b) {
  190.         return a.pathCompare(b, TREE_MODE) == 0;
  191.     }

  192.     private boolean isGitlink(AbstractTreeIterator p) {
  193.         return FileMode.GITLINK.equals(p.mode);
  194.     }

  195.     private static boolean isTree(AbstractTreeIterator p) {
  196.         return FileMode.TREE.equals(p.mode);
  197.     }

  198.     private boolean skipEntry(AbstractTreeIterator minRef)
  199.             throws CorruptObjectException {
  200.         // A tree D/F may have been handled earlier. We need to
  201.         // not report this path if it has already been reported.
  202.         //
  203.         for (AbstractTreeIterator t : trees) {
  204.             if (t.matches == minRef || t.first())
  205.                 continue;

  206.             int stepsBack = 0;
  207.             for (;;) {
  208.                 stepsBack++;
  209.                 t.back(1);

  210.                 final int cmp = t.pathCompare(minRef, 0);
  211.                 if (cmp == 0) {
  212.                     // We have already seen this "$path" before. Skip it.
  213.                     //
  214.                     t.next(stepsBack);
  215.                     return true;
  216.                 } else if (cmp < 0 || t.first()) {
  217.                     // We cannot find "$path" in t; it will never appear.
  218.                     //
  219.                     t.next(stepsBack);
  220.                     break;
  221.                 }
  222.             }
  223.         }

  224.         // We have never seen the current path before.
  225.         //
  226.         return false;
  227.     }

  228.     private AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
  229.             throws CorruptObjectException {
  230.         // Look for a possible D/F conflict forward in the tree(s)
  231.         // as there may be a "$path/" which matches "$path". Make
  232.         // such entries match this entry.
  233.         //
  234.         AbstractTreeIterator treeMatch = null;
  235.         for (AbstractTreeIterator t : trees) {
  236.             if (t.matches == minRef || t.eof())
  237.                 continue;

  238.             for (;;) {
  239.                 final int cmp = t.pathCompare(minRef, TREE_MODE);
  240.                 if (cmp < 0) {
  241.                     // The "$path/" may still appear later.
  242.                     //
  243.                     t.matchShift++;
  244.                     t.next(1);
  245.                     if (t.eof()) {
  246.                         t.back(t.matchShift);
  247.                         t.matchShift = 0;
  248.                         break;
  249.                     }
  250.                 } else if (cmp == 0) {
  251.                     // We have a conflict match here.
  252.                     //
  253.                     t.matches = minRef;
  254.                     treeMatch = t;
  255.                     break;
  256.                 } else {
  257.                     // A conflict match is not possible.
  258.                     //
  259.                     if (t.matchShift != 0) {
  260.                         t.back(t.matchShift);
  261.                         t.matchShift = 0;
  262.                     }
  263.                     break;
  264.                 }
  265.             }
  266.         }

  267.         // When the combineDF is called, the t.matches field stores other
  268.         // entry (i.e. tree iterator) with an equal path. However, the
  269.         // previous loop moves each iterator independently. As a result,
  270.         // iterators which have had equals path at the start of the
  271.         // method can have different paths at this point.
  272.         // Reevaluate existing matches.
  273.         // The NameConflictTreeWalkTest.testDF_specialFileNames test
  274.         // cover this situation.
  275.         for (AbstractTreeIterator t : trees) {
  276.             // The previous loop doesn't touch tree iterator if it matches
  277.             // minRef. Skip it here
  278.             if (t.eof() || t.matches == null || t.matches == minRef)
  279.                 continue;
  280.             // The t.pathCompare takes into account the entry type (file
  281.             // or directory) and returns non-zero value if names match
  282.             // but entry type don't match.
  283.             // We want to keep such matches (file/directory conflict),
  284.             // so reset matches only if names are not equal.
  285.             if (!nameEqual(t, t.matches))
  286.                 t.matches = null;
  287.         }

  288.         if (treeMatch != null) {
  289.             // If we do have a conflict use one of the directory
  290.             // matching iterators instead of the file iterator.
  291.             // This way isSubtree is true and isRecursive works.
  292.             //
  293.             for (AbstractTreeIterator t : trees)
  294.                 if (t.matches == minRef)
  295.                     t.matches = treeMatch;

  296.             if (dfConflict == null && !isGitlink(minRef)) {
  297.                 dfConflict = treeMatch;
  298.             }

  299.             return treeMatch;
  300.         }

  301.         return minRef;
  302.     }

  303.     @Override
  304.     void popEntriesEqual() throws CorruptObjectException {
  305.         final AbstractTreeIterator ch = currentHead;
  306.         for (AbstractTreeIterator t : trees) {
  307.             if (t.matches == ch) {
  308.                 if (t.matchShift == 0)
  309.                     t.next(1);
  310.                 else {
  311.                     t.back(t.matchShift);
  312.                     t.matchShift = 0;
  313.                 }
  314.                 t.matches = null;
  315.             }
  316.         }

  317.         if (ch == dfConflict)
  318.             dfConflict = null;
  319.     }

  320.     @Override
  321.     void skipEntriesEqual() throws CorruptObjectException {
  322.         final AbstractTreeIterator ch = currentHead;
  323.         for (AbstractTreeIterator t : trees) {
  324.             if (t.matches == ch) {
  325.                 if (t.matchShift == 0)
  326.                     t.skip();
  327.                 else {
  328.                     t.back(t.matchShift);
  329.                     t.matchShift = 0;
  330.                 }
  331.                 t.matches = null;
  332.             }
  333.         }

  334.         if (ch == dfConflict)
  335.             dfConflict = null;
  336.     }

  337.     @Override
  338.     void stopWalk() throws IOException {
  339.         if (!needsStopWalk()) {
  340.             return;
  341.         }

  342.         // Name conflicts make aborting early difficult. Multiple paths may
  343.         // exist between the file and directory versions of a name. To ensure
  344.         // the directory version is skipped over (as it was previously visited
  345.         // during the file version step) requires popping up the stack and
  346.         // finishing out each subtree that the walker dove into. Siblings in
  347.         // parents do not need to be recursed into, bounding the cost.
  348.         for (;;) {
  349.             AbstractTreeIterator t = min();
  350.             if (t.eof()) {
  351.                 if (depth > 0) {
  352.                     exitSubtree();
  353.                     popEntriesEqual();
  354.                     continue;
  355.                 }
  356.                 return;
  357.             }
  358.             currentHead = t;
  359.             skipEntriesEqual();
  360.         }
  361.     }

  362.     private boolean needsStopWalk() {
  363.         for (AbstractTreeIterator t : trees) {
  364.             if (t.needsStopWalk()) {
  365.                 return true;
  366.             }
  367.         }
  368.         return false;
  369.     }

  370.     /**
  371.      * True if the current entry is covered by a directory/file conflict.
  372.      *
  373.      * This means that for some prefix of the current entry's path, this walk
  374.      * has detected a directory/file conflict. Also true if the current entry
  375.      * itself is a directory/file conflict.
  376.      *
  377.      * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
  378.      * true then you know that either for path foo or for path foo/bar files and
  379.      * folders were detected.
  380.      *
  381.      * @return <code>true</code> if the current entry is covered by a
  382.      *         directory/file conflict, <code>false</code> otherwise
  383.      */
  384.     public boolean isDirectoryFileConflict() {
  385.         return dfConflict != null;
  386.     }
  387. }