Be more paranoid pruning/purging repos

We don't want to rely just on the manifest when pruning or purging repos
that are acting as alternates to others. Look through all repos and make
sure they aren't actually being used.

May need optimizing for very huge collections of repositories.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2ace3fd..a303b30 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,5 +13,7 @@
   - no longer locking repos when running repack/prune/fsck, because
     these operations are safe as long as they are done by git itself
 
-- fix grok-pull so it no longer purges repos known to be providing
+- fix grok-pull so it no longer purges repos that are providing
   alternates to others
+- fix grok-fsck so it's more paranoid when pruning repos providing
+  alternates to others (checks all repos on disk, not just manifest)
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index 18dcded..f589010 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -170,24 +170,32 @@
     return fingerprint
 
 
-def find_all_alt_repos(refrepo, manifest):
-    """
-    :param toplevel: toplevel of the repository location
-    :param refrepo: path of the repository
-    :param manifest: full manifest of repositories we track
-    :return: List of repositories using gitdir in its alternates
-    """
-    logger.debug('Finding all repositories using %s as its alternates',
-                 refrepo)
-    refrepo = refrepo.lstrip('/')
-    repolist = []
-    for gitdir in manifest.keys():
-        if gitdir.lstrip('/') == refrepo:
+def is_alt_repo(toplevel, refrepo):
+    # We recurse through toplevel and return true if we find at least
+    # one repo that lists us in its objects/info/alternates.
+    looking_for = os.path.join(toplevel, refrepo.strip('/'), 'objects').encode('utf-8')
+    import mmap
+    for root, dirs, files in os.walk(toplevel, topdown=True):
+        if not len(dirs):
             continue
-        if 'reference' in manifest[gitdir].keys() and manifest[gitdir]['reference'] is not None:
-            if manifest[gitdir]['reference'].lstrip('/') == refrepo:
-                repolist.append(gitdir)
-    return repolist
+
+        torm = []
+        for name in dirs:
+            # Is there an objects/info/alternates in this dir?
+            altfile = os.path.join(root, name, 'objects', 'info', 'alternates')
+            if os.path.exists(altfile):
+                with open(altfile, 'rb') as altfh:
+                    if looking_for in altfh.read():
+                        logger.debug('Found refrepo %s in  %s', refrepo,
+                                     altfile)
+                        return True
+                torm.append(name)
+
+        for name in torm:
+            # don't recurse into the found *.git dirs
+            dirs.remove(name)
+
+    return False
 
 
 def find_all_gitdirs(toplevel, ignore=None):
diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py
index 23f3a61..297791a 100755
--- a/grokmirror/fsck.py
+++ b/grokmirror/fsck.py
@@ -38,9 +38,7 @@
 
     # Are any other repos using us in their objects/info/alternates?
     gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
-    repolist = grokmirror.find_all_alt_repos(gitdir, manifest)
-
-    if len(repolist):
+    if grokmirror.is_alt_repo(config['toplevel'], gitdir):
         logger.info('  prune : skipped, is alternate to other repos')
         return prune_ok
 
diff --git a/grokmirror/pull.py b/grokmirror/pull.py
index a80d724..6a014c3 100755
--- a/grokmirror/pull.py
+++ b/grokmirror/pull.py
@@ -998,8 +998,7 @@
                     else:
                         # is anything using us for alternates?
                         gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
-                        repolist = grokmirror.find_all_alt_repos(gitdir, culled)
-                        if repolist:
+                        if grokmirror.is_alt_repo(toplevel, gitdir):
                             logger.info('Not purging %s because it is used by '
                                         'other repos via alternates', founddir)
                         else: