Add grok-bundle command

We use grokmirror to generate clone.bundle files for use with Android's
"repo" command. Make this an official part of grokmirror.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
diff --git a/grokmirror/bundle.py b/grokmirror/bundle.py
new file mode 100644
index 0000000..b72f3c0
--- /dev/null
+++ b/grokmirror/bundle.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2013-2020 by The Linux Foundation and contributors
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import os
+import logging
+import fnmatch
+import grokmirror
+
+from pathlib import Path
+
+# default basic logger. We override it later.
+logger = logging.getLogger(__name__)
+
+
+def get_repo_size(fullpath):
+    reposize = 0
+    obj_info = grokmirror.get_repo_obj_info(fullpath)
+    if 'alternate' in obj_info:
+        altpath = grokmirror.get_altrepo(fullpath)
+        reposize = get_repo_size(altpath)
+    reposize += int(obj_info['size'])
+    reposize += int(obj_info['size-pack'])
+
+    logger.debug('%s size: %s', fullpath, reposize)
+    return reposize
+
+
+def generate_bundles(config, outdir, gitargs, revlistargs, maxsize, include):
+    # uses advisory lock, so its safe even if we die unexpectedly
+    manifest = grokmirror.read_manifest(config['core'].get('manifest'))
+    toplevel = os.path.realpath(config['core'].get('toplevel'))
+    if gitargs:
+        gitargs = gitargs.split()
+    if revlistargs:
+        revlistargs = revlistargs.split()
+
+    for repo in manifest.keys():
+        logger.debug('Checking %s', repo)
+        # Does it match our globbing pattern?
+        found = False
+        for tomatch in include:
+            if fnmatch.fnmatch(repo, tomatch) or fnmatch.fnmatch(repo, tomatch.lstrip('/')):
+                found = True
+                break
+        if not found:
+            logger.debug('%s does not match include list, skipping', repo)
+            continue
+
+        repo = repo.lstrip('/')
+        fullpath = os.path.join(toplevel, repo)
+
+        bundledir = os.path.join(outdir, repo.replace('.git', ''))
+        Path(bundledir).mkdir(parents=True, exist_ok=True)
+
+        repofpr = grokmirror.get_repo_fingerprint(toplevel, repo)
+        logger.debug('%s fingerprint is %s', repo, repofpr)
+
+        # Do we have a bundle file already?
+        bfile = os.path.join(bundledir, 'clone.bundle')
+        bfprfile = os.path.join(bundledir, '.fingerprint')
+        logger.debug('Looking for %s', bfile)
+        if os.path.exists(bfile):
+            # Do we have a bundle fingerprint?
+            logger.debug('Found existing bundle in %s', bfile)
+            if os.path.exists(bfprfile):
+                with open(bfprfile) as fh:
+                    bfpr = fh.read().strip()
+                logger.debug('Read bundle fingerprint from %s: %s', bfprfile, bfpr)
+                if bfpr == repofpr:
+                    logger.info('  skipped: %s (unchanged)', repo)
+                    continue
+
+        logger.debug('checking size of %s', repo)
+        total_size = get_repo_size(fullpath)/1024/1024
+
+        if total_size > maxsize:
+            logger.info('  skipped: %s (%s > %s)', repo, total_size, maxsize)
+            continue
+
+        fullargs = gitargs + ['bundle', 'create', bfile] + revlistargs
+        logger.debug('Full git args: %s', fullargs)
+        logger.info(' generate: %s', bfile)
+        ecode, out, err = grokmirror.run_git_command(fullpath, fullargs)
+
+        if ecode == 0:
+            with open(bfprfile, 'w') as fh:
+                fh.write(repofpr)
+                logger.debug('Wrote %s into %s', repofpr, bfprfile)
+
+    return 0
+
+
+def parse_args():
+    import argparse
+
+    # noinspection PyTypeChecker
+    op = argparse.ArgumentParser(prog='grok-bundle',
+                                 description='Generate clone.bundle files for use with "repo"',
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    op.add_argument('-v', '--verbose', action='store_true',
+                    default=False,
+                    help='Be verbose and tell us what you are doing')
+    op.add_argument('-c', '--config',
+                    required=True,
+                    help='Location of the configuration file')
+    op.add_argument('-o', '--outdir',
+                    required=True,
+                    help='Location where to store bundle files')
+    op.add_argument('-g', '--gitargs',
+                    default='-c core.compression=9',
+                    help='extra args to pass to git')
+    op.add_argument('-r', '--revlistargs',
+                    default='--branches HEAD',
+                    help='Rev-list args to use')
+    op.add_argument('-s', '--maxsize', type=int,
+                    default=2,
+                    help='Maximum size of git repositories to bundle (in GiB)')
+    op.add_argument('-i', '--include', nargs='*',
+                    default='*',
+                    help='List repositories to bundle (accepts shell globbing)')
+
+    opts = op.parse_args()
+
+    return opts
+
+
+def grok_bundle(cfgfile, outdir, gitargs, revlistargs, maxsize, include, verbose=False):
+    global logger
+
+    config = grokmirror.load_config_file(cfgfile)
+
+    logfile = config['core'].get('log', None)
+    if config['core'].get('loglevel', 'info') == 'debug':
+        loglevel = logging.DEBUG
+    else:
+        loglevel = logging.INFO
+
+    logger = grokmirror.init_logger('bundle', logfile, loglevel, verbose)
+
+    return generate_bundles(config, outdir, gitargs, revlistargs, maxsize, include)
+
+
+def command():
+    opts = parse_args()
+
+    retval = grok_bundle(
+        opts.config, opts.outdir, opts.gitargs, opts.revlistargs, opts.maxsize, opts.include, verbose=opts.verbose)
+
+    sys.exit(retval)
+
+
+if __name__ == '__main__':
+    command()
diff --git a/man/grok-bundle.1 b/man/grok-bundle.1
new file mode 100644
index 0000000..6d99aa0
--- /dev/null
+++ b/man/grok-bundle.1
@@ -0,0 +1,106 @@
+.\" Man page generated from reStructuredText.
+.
+.TH GROK-BUNDLE 1 "2020-09-04" "2.0.0" ""
+.SH NAME
+GROK-BUNDLE \- Create clone.bundle files for use with "repo"
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.SH SYNOPSIS
+.INDENT 0.0
+.INDENT 3.5
+grok\-bundle [options] \-c grokmirror.conf \-o path
+.UNINDENT
+.UNINDENT
+.SH DESCRIPTION
+.sp
+Android\(aqs "repo" tool will check for the presence of clone.bundle files
+before performing a fresh git clone. This is done in order to offload
+most of the git traffic to a CDN and reduce the load on git servers
+themselves.
+.sp
+This command will generate clone.bundle files in a hierarchy expected by
+repo. You can then sync the output directory to a CDN service.
+.SH OPTIONS
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \-h\fP,\fB  \-\-help
+show this help message and exit
+.TP
+.B \-v\fP,\fB  \-\-verbose
+Be verbose and tell us what you are doing (default: False)
+.TP
+.BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG
+Location of the configuration file
+.TP
+.BI \-o \ OUTDIR\fP,\fB \ \-\-outdir \ OUTDIR
+Location where to store bundle files
+.TP
+.BI \-g \ GITARGS\fP,\fB \ \-\-gitargs \ GITARGS
+extra args to pass to git (default: \-c core.compression=9)
+.TP
+.BI \-r \ REVLISTARGS\fP,\fB \ \-\-revlistargs \ REVLISTARGS
+Rev\-list args to use (default: \-\-branches HEAD)
+.TP
+.BI \-s \ MAXSIZE\fP,\fB \ \-\-maxsize \ MAXSIZE
+Maximum size of git repositories to bundle (in GiB) (default: 2)
+.TP
+.BI \-i\fP,\fB  \-\-include \ INCLUDE
+List repositories to bundle (accepts shell globbing) (default: *)
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH EXAMPLES
+.INDENT 0.0
+.INDENT 3.5
+grok\-bundle \-c grokmirror.conf \-o /var/www/bundles \-i /pub/scm/linux/kernel/git/torvalds/linux.git /pub/scm/linux/kernel/git/stable/linux.git /pub/scm/linux/kernel/git/next/linux\-next.git
+.UNINDENT
+.UNINDENT
+.SH SEE ALSO
+.INDENT 0.0
+.IP \(bu 2
+grok\-pull(1)
+.IP \(bu 2
+grok\-manifest(1)
+.IP \(bu 2
+grok\-fsck(1)
+.IP \(bu 2
+git(1)
+.UNINDENT
+.SH SUPPORT
+.sp
+Email \fI\%tools@linux.kernel.org\fP\&.
+.SH AUTHOR
+mricon@kernel.org
+
+License: GPLv3+
+.SH COPYRIGHT
+The Linux Foundation and contributors
+.\" Generated by docutils manpage writer.
+.
diff --git a/man/grok-bundle.1.rst b/man/grok-bundle.1.rst
new file mode 100644
index 0000000..e71d206
--- /dev/null
+++ b/man/grok-bundle.1.rst
@@ -0,0 +1,60 @@
+GROK-BUNDLE
+===========
+-------------------------------------------------
+Create clone.bundle files for use with "repo"
+-------------------------------------------------
+
+:Author:    mricon@kernel.org
+:Date:      2020-09-04
+:Copyright: The Linux Foundation and contributors
+:License:   GPLv3+
+:Version:   2.0.0
+:Manual section: 1
+
+SYNOPSIS
+--------
+    grok-bundle [options] -c grokmirror.conf -o path
+
+DESCRIPTION
+-----------
+Android's "repo" tool will check for the presence of clone.bundle files
+before performing a fresh git clone. This is done in order to offload
+most of the git traffic to a CDN and reduce the load on git servers
+themselves.
+
+This command will generate clone.bundle files in a hierarchy expected by
+repo. You can then sync the output directory to a CDN service.
+
+OPTIONS
+-------
+
+  -h, --help            show this help message and exit
+  -v, --verbose         Be verbose and tell us what you are doing (default: False)
+  -c CONFIG, --config CONFIG
+                        Location of the configuration file
+  -o OUTDIR, --outdir OUTDIR
+                        Location where to store bundle files
+  -g GITARGS, --gitargs GITARGS
+                        extra args to pass to git (default: -c core.compression=9)
+  -r REVLISTARGS, --revlistargs REVLISTARGS
+                        Rev-list args to use (default: --branches HEAD)
+  -s MAXSIZE, --maxsize MAXSIZE
+                        Maximum size of git repositories to bundle (in GiB) (default: 2)
+  -i, --include INCLUDE
+                        List repositories to bundle (accepts shell globbing) (default: \*)
+
+EXAMPLES
+--------
+
+    grok-bundle -c grokmirror.conf -o /var/www/bundles -i /pub/scm/linux/kernel/git/torvalds/linux.git /pub/scm/linux/kernel/git/stable/linux.git /pub/scm/linux/kernel/git/next/linux-next.git
+
+SEE ALSO
+--------
+* grok-pull(1)
+* grok-manifest(1)
+* grok-fsck(1)
+* git(1)
+
+SUPPORT
+-------
+Email tools@linux.kernel.org.
diff --git a/setup.py b/setup.py
index 6584686..f72154c 100644
--- a/setup.py
+++ b/setup.py
@@ -63,6 +63,7 @@
             "grok-pull=grokmirror.pull:command",
             "grok-fsck=grokmirror.fsck:command",
             "grok-manifest=grokmirror.manifest:command",
+            "grok-bundle=grokmirror.bundle:command",
         ]
     }
 )