Add some more features to piper

- Make it a fully supported command called grok-pi-piper
- Add support for reshallowing repos after each piper run,
  saving tons of space
- Sprinkle expanduser in a few places to better support user-initiated
  operations

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5b9c69f..c5e1eee 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,7 +1,7 @@
 v2.0.2 (2020-10-06)
 - Provide pi-piper utility for piping new messages from public-inbox
   repositories. It can be specified as post_update_hook:
-  post_update_hook = /usr/bin/pi-piper -c ~/pi-piper.conf
+  post_update_hook = /usr/bin/grok-pi-piper -c ~/.config/pi-piper.conf
 - Add -r option to grok-manifest to ignore specific refs when calculating
   repository fingerprint. This is mostly useful for mirroring from gerrit.
 
diff --git a/contrib/python-grokmirror.spec b/contrib/python-grokmirror.spec
index 3c96836..9d250bb 100644
--- a/contrib/python-grokmirror.spec
+++ b/contrib/python-grokmirror.spec
@@ -59,7 +59,6 @@
 %{__install} -m 0644 contrib/*.timer %{buildroot}/%{_unitdir}/
 %{__install} -m 0644 contrib/logrotate %{buildroot}/%{_sysconfdir}/logrotate.d/grokmirror
 %{__install} -m 0644 grokmirror.conf %{buildroot}/%{_sysconfdir}/%{srcname}/grokmirror.conf.example
-%{__install} -m 0755 contrib/pi-piper.py %{buildroot}/%{_bindir}/pi-piper
 
 echo "d /run/%{srcname} 0755 %{username} %{groupname}" > %{buildroot}/%{_tmpfilesdir}/%{srcname}.conf
 
@@ -72,7 +71,7 @@
 
 %files -n python3-%{srcname}
 %license LICENSE.txt
-%doc README.rst grokmirror.conf contrib/pi-piper.conf
+%doc README.rst grokmirror.conf pi-piper.conf
 %dir %attr(0750, %{username}, %{groupname}) %{userhome}
 %dir %attr(0755, %{username}, %{groupname}) %{_localstatedir}/log/%{srcname}/
 %dir %attr(0755, %{username}, %{groupname}) /run/%{srcname}/
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index f888593..b37408f 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -927,16 +927,23 @@
         sys.stderr.write('       Perhaps this is a grokmirror-1.x config file?\n')
         sys.exit(1)
 
-    toplevel = os.path.realpath(config['core'].get('toplevel'))
+    toplevel = os.path.realpath(os.path.expanduser(config['core'].get('toplevel')))
     if not os.access(toplevel, os.W_OK):
         logger.critical('Toplevel %s does not exist or is not writable', toplevel)
         sys.exit(1)
+    # Just in case we did expanduser
+    config['core']['toplevel'] = toplevel
 
     obstdir = config['core'].get('objstore', None)
     if obstdir is None:
         obstdir = os.path.join(toplevel, 'objstore')
         config['core']['objstore'] = obstdir
 
+    # Handle some other defaults
+    manifile = config['core'].get('manifest')
+    if not manifile:
+        config['core']['manifest'] = os.path.join(toplevel, 'manifest.js.gz')
+
     fstat = os.stat(cfgfile)
     # stick last config file modification date into the config object,
     # so we can catch config file updates
@@ -1004,7 +1011,7 @@
     logger.setLevel(logging.DEBUG)
 
     if logfile:
-        ch = logging.handlers.WatchedFileHandler(logfile)
+        ch = logging.handlers.WatchedFileHandler(os.path.expanduser(logfile))
         formatter = logging.Formatter(subcommand + '[%(process)d] %(asctime)s - %(levelname)s - %(message)s')
         ch.setFormatter(formatter)
         ch.setLevel(loglevel)
diff --git a/contrib/pi-piper.py b/grokmirror/pi-piper.py
old mode 100644
new mode 100755
similarity index 70%
rename from contrib/pi-piper.py
rename to grokmirror/pi-piper.py
index 59ab27f..5d5bbfb
--- a/contrib/pi-piper.py
+++ b/grokmirror/pi-piper.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #
-# This is a ready-made post_update_hook for mirroring public-inbox repositories.
-# updated via grokmirror to arbitrary commands.
+# This is a ready-made post_update_hook script for piping messages from
+# mirrored public-inbox repositories to arbitrary commands (e.g. procmail).
 #
 
 __author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
@@ -36,20 +36,9 @@
     if pipelast:
         rev_range = '-n %d' % pipelast
     else:
-        try:
-            with open(statf, 'r') as fh:
-                latest = fh.read().strip()
-                rev_range = f'{latest}..'
-        except FileNotFoundError:
-            logger.info('Initial run for %s', fullpath)
-            args = ['rev-list', '-n', '1', 'master']
-            ecode, out, err = grokmirror.run_git_command(fullpath, args)
-            if ecode > 0:
-                raise KeyError('Could not list revs in %s' % fullpath)
-            # Just write latest into the tracking file and return nothing
-            with open(statf, 'w') as fh:
-                fh.write(out.strip())
-                return list()
+        with open(statf, 'r') as fh:
+            latest = fh.read().strip()
+            rev_range = f'{latest}..'
 
     args = ['rev-list', '--pretty=oneline', '--reverse', rev_range, 'master']
     ecode, out, err = grokmirror.run_git_command(fullpath, args)
@@ -66,7 +55,33 @@
     return newrevs
 
 
-def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None):
+def reshallow(repo: str, commit_id: str) -> int:
+    with open(os.path.join(repo, 'shallow'), 'w') as fh:
+        fh.write(commit_id)
+        fh.write('\n')
+    logger.info('   prune: %s ', repo)
+    ecode, out, err = grokmirror.run_git_command(repo, ['gc', '--prune=now'])
+    return ecode
+
+
+def init_piper_tracking(repo: str, shallow: bool) -> bool:
+    logger.info('Initial setup for %s', repo)
+    args = ['rev-list', '-n', '1', 'master']
+    ecode, out, err = grokmirror.run_git_command(repo, args)
+    if ecode > 0 or not out:
+        logger.info('Could not list revs in %s', repo)
+        return False
+    # Just write latest into the tracking file and return
+    latest = out.strip()
+    statf = os.path.join(repo, 'pi-piper.latest')
+    with open(statf, 'w') as fh:
+        fh.write(latest)
+    if shallow:
+        reshallow(repo, latest)
+
+
+def run_pi_repo(repo: str, pipedef: str, dryrun: bool = False, shallow: bool = False,
+                pipelast: Optional[int] = None) -> None:
     logger.info('Checking %s', repo)
     sp = shlex.shlex(pipedef, posix=True)
     sp.whitespace_split = True
@@ -76,6 +91,14 @@
         sys.exit(1)
 
     statf = os.path.join(repo, 'pi-piper.latest')
+    if not os.path.exists(statf):
+        if dryrun:
+            logger.info('Would have set up piper for %s [DRYRUN]', repo)
+            return
+        if not init_piper_tracking(repo, shallow):
+            logger.critical('Unable to set up piper for %s', repo)
+        return
+
     try:
         revlist = git_get_new_revs(repo, pipelast=pipelast)
     except KeyError:
@@ -88,8 +111,12 @@
         #      then going through history to find the new commit-id of that
         #      message. Unless, of course, that's the exact message that got
         #      deleted in the first place. :/
+        #      This also makes it hard with shallow repos, since we'd have
+        #      to unshallow them first in order to find that message.
         logger.critical('Assuming the repository got rebased, dropping all history.')
         os.unlink(statf)
+        if not dryrun:
+            init_piper_tracking(repo, shallow)
         revlist = git_get_new_revs(repo)
 
     if not revlist:
@@ -119,18 +146,20 @@
         with open(statf, 'w') as fh:
             fh.write(latest_good)
             logger.info('Wrote %s', statf)
+        if ecode == 0 and shallow:
+            reshallow(repo, latest_good)
 
     sys.exit(ecode)
 
 
-def main():
+def command():
     import argparse
     from configparser import ConfigParser, ExtendedInterpolation
 
     global logger
 
     # noinspection PyTypeChecker
-    op = argparse.ArgumentParser(prog='pi-piper',
+    op = argparse.ArgumentParser(prog='grok-pi-piper',
                                  description='Pipe new messages from public-inbox repositories to arbitrary commands',
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     op.add_argument('-v', '--verbose', action='store_true',
@@ -149,11 +178,12 @@
 
     opts = op.parse_args()
 
-    if not os.path.exists(opts.config):
-        sys.stderr.write('ERORR: File does not exist: %s\n' % opts.config)
+    cfgfile = os.path.expanduser(opts.config)
+    if not cfgfile:
+        sys.stderr.write('ERORR: File does not exist: %s\n' % cfgfile)
         sys.exit(1)
     config = ConfigParser(interpolation=ExtendedInterpolation())
-    config.read(os.path.expanduser(opts.config))
+    config.read(os.path.expanduser(cfgfile))
 
     # Find out the section that we want from the config file
     section = 'DEFAULT'
@@ -166,16 +196,18 @@
         # Quick exit
         sys.exit(0)
 
-    logfile = os.path.expanduser(config[section].get('logfile'))
+    logfile = config[section].get('log')
     if config[section].get('loglevel') == 'debug':
         loglevel = logging.DEBUG
     else:
         loglevel = logging.INFO
 
+    shallow = config[section].getboolean('shallow', False) # noqa
+
     logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose)
 
-    run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, pipelast=opts.pipelast)
+    run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, shallow=shallow, pipelast=opts.pipelast)
 
 
 if __name__ == '__main__':
-    main()
+    command()
diff --git a/grokmirror/pull.py b/grokmirror/pull.py
index ac7672f..1b2b7a0 100755
--- a/grokmirror/pull.py
+++ b/grokmirror/pull.py
@@ -511,12 +511,12 @@
         return
 
     for hookscript in hookscripts.split('\n'):
-        hookscript = hookscript.strip()
+        hookscript = os.path.expanduser(hookscript.strip())
         sp = shlex.shlex(hookscript, posix=True)
         sp.whitespace_split = True
         args = list(sp)
 
-        logger.info('     hook: %s', args[0])
+        logger.info('     hook: %s', ' '.join(args))
         if not os.access(args[0], os.X_OK):
             logger.warning('post_update_hook %s is not executable', hookscript)
             continue
@@ -1063,7 +1063,6 @@
     pull_threads = config['pull'].getint('pull_threads', 0)
     if pull_threads < 1:
         # take half of available CPUs by default
-        logger.info('pull_threads is not set, consider setting it')
         pull_threads = int(mp.cpu_count() / 2)
 
     busy = set()
diff --git a/man/grok-pi-piper.1 b/man/grok-pi-piper.1
new file mode 100644
index 0000000..d1f3da6
--- /dev/null
+++ b/man/grok-pi-piper.1
@@ -0,0 +1,129 @@
+.\" Man page generated from reStructuredText.
+.
+.TH GROK-PI-PIPER 1 "2020-10-07" "2.0.2" ""
+.SH NAME
+GROK-PI-PIPER \- Hook script for piping new messages from public-inbox repos
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.SH SYNOPSIS
+.INDENT 0.0
+.INDENT 3.5
+grok\-pi\-piper [\-h] [\-v] [\-d] \-c CONFIG [\-l PIPELAST] [\-\-version] repo
+.UNINDENT
+.UNINDENT
+.SH DESCRIPTION
+.sp
+This is a ready\-made hook script that can be called from
+pull.post_update_hook when mirroring public\-inbox repositories. It will
+pipe all newly received messages to arbitrary commands defined in the
+config file. The simplest configuration for lore.kernel.org is:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+~/.config/pi\-piper.conf
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+[DEFAULT]
+pipe = /usr/bin/procmail
+shallow = yes
+
+~/.procmailrc
+\-\-\-\-\-\-\-\-\-\-\-\-\-
+DEFAULT=$HOME/Maildir/
+
+~/.config/lore.conf
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+[core]
+toplevel = ~/.local/share/grokmirror/lore
+log = ${toplevel}/grokmirror.log
+
+[remote]
+site = https://lore.kernel.org
+manifest = https://lore.kernel.org/manifest.js.gz
+
+[pull]
+post_update_hook = ~/.local/bin/grok\-pi\-piper \-c ~/.config/pi\-piper.conf
+include = /list\-you\-want/*
+          /another\-list/*
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+It assumes that grokmirror was installed from pip. If you installed it
+via some other means, please check the path for the grok\-pi\-piper
+script.
+.sp
+Note, that initial clone may take a long time, even if you set
+shallow=yes.
+.sp
+See pi\-piper.conf for other config options.
+.SH OPTIONS
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \-h\fP,\fB  \-\-help
+show this help message and exit
+.TP
+.B \-v\fP,\fB  \-\-verbose
+Be verbose and tell us what you are doing (default: False)
+.TP
+.B \-d\fP,\fB  \-\-dry\-run
+Do a dry\-run and just show what would be done (default: False)
+.TP
+.BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG
+Location of the configuration file (default: None)
+.TP
+.BI \-l \ PIPELAST\fP,\fB \ \-\-pipe\-last \ PIPELAST
+Force pipe last NN messages in the list, regardless of tracking (default: None)
+.TP
+.B \-\-version
+show program\(aqs version number and exit
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH SEE ALSO
+.INDENT 0.0
+.IP \(bu 2
+grok\-pull(1)
+.IP \(bu 2
+git(1)
+.UNINDENT
+.SH SUPPORT
+.sp
+Email \fI\%tools@linux.kernel.org\fP\&.
+.SH AUTHOR
+mricon@kernel.org
+
+License: GPLv3+
+.SH COPYRIGHT
+The Linux Foundation and contributors
+.\" Generated by docutils manpage writer.
+.
diff --git a/man/grok-pi-piper.1.rst b/man/grok-pi-piper.1.rst
new file mode 100644
index 0000000..18d16bb
--- /dev/null
+++ b/man/grok-pi-piper.1.rst
@@ -0,0 +1,79 @@
+GROK-PI-PIPER
+=============
+-----------------------------------------------------------
+Hook script for piping new messages from public-inbox repos
+-----------------------------------------------------------
+
+:Author:    mricon@kernel.org
+:Date:      2020-10-07
+:Copyright: The Linux Foundation and contributors
+:License:   GPLv3+
+:Version:   2.0.2
+:Manual section: 1
+
+SYNOPSIS
+--------
+    grok-pi-piper [-h] [-v] [-d] -c CONFIG [-l PIPELAST] [--version] repo
+
+DESCRIPTION
+-----------
+This is a ready-made hook script that can be called from
+pull.post_update_hook when mirroring public-inbox repositories. It will
+pipe all newly received messages to arbitrary commands defined in the
+config file. The simplest configuration for lore.kernel.org is::
+
+    ~/.config/pi-piper.conf
+    -----------------------
+    [DEFAULT]
+    pipe = /usr/bin/procmail
+    shallow = yes
+
+    ~/.procmailrc
+    -------------
+    DEFAULT=$HOME/Maildir/
+
+    ~/.config/lore.conf
+    -------------------
+    [core]
+    toplevel = ~/.local/share/grokmirror/lore
+    log = ${toplevel}/grokmirror.log
+
+    [remote]
+    site = https://lore.kernel.org
+    manifest = https://lore.kernel.org/manifest.js.gz
+
+    [pull]
+    post_update_hook = ~/.local/bin/grok-pi-piper -c ~/.config/pi-piper.conf
+    include = /list-you-want/*
+              /another-list/*
+
+It assumes that grokmirror was installed from pip. If you installed it
+via some other means, please check the path for the grok-pi-piper
+script.
+
+Note, that initial clone may take a long time, even if you set
+shallow=yes.
+
+See pi-piper.conf for other config options.
+
+
+OPTIONS
+-------
+  -h, --help            show this help message and exit
+  -v, --verbose         Be verbose and tell us what you are doing (default: False)
+  -d, --dry-run         Do a dry-run and just show what would be done (default: False)
+  -c CONFIG, --config CONFIG
+                        Location of the configuration file (default: None)
+  -l PIPELAST, --pipe-last PIPELAST
+                        Force pipe last NN messages in the list, regardless of tracking (default: None)
+  --version             show program's version number and exit
+
+
+SEE ALSO
+--------
+* grok-pull(1)
+* git(1)
+
+SUPPORT
+-------
+Email tools@linux.kernel.org.
diff --git a/contrib/pi-piper.conf b/pi-piper.conf
similarity index 61%
rename from contrib/pi-piper.conf
rename to pi-piper.conf
index 023fd1a..9d1a4d0 100644
--- a/contrib/pi-piper.conf
+++ b/pi-piper.conf
@@ -5,10 +5,18 @@
 #     DEFAULT=$HOME/Maildir/
 # You can now read your mail with "mutt -f ~/Maildir/"
 pipe = /usr/bin/procmail
-# You can use ~/ for paths in your home dir
-logfile = ~/pi-piper.log
+# Once you've successfully piped the messages, you generally
+# don't need them any more. If you set shallow = yes, then
+# the repository will be configured as "shallow" and all succesffully
+# processed messages will be pruned from the repo.
+# This will greatly reduce disk space usage, especially on large archives.
+# You can always get any number of them back, e.g. by running:
+# git fetch _grokmirror master --deepen 100
+shallow = yes
+# You can use ~/ for paths in your home dir, or omit for no log
+#log = ~/pi-piper.log
 # Can be "info" or "debug". Note, that debug will have message bodies as well.
-loglevel = info
+#loglevel = info
 
 # Overrides for any defaults. You may not need any if all you want is to pipe all mirrored
 # public-inboxes to procmail.
diff --git a/setup.py b/setup.py
index f72154c..1aabf48 100644
--- a/setup.py
+++ b/setup.py
@@ -64,6 +64,7 @@
             "grok-fsck=grokmirror.fsck:command",
             "grok-manifest=grokmirror.manifest:command",
             "grok-bundle=grokmirror.bundle:command",
+            "grok-pi-piper=grokmirror.pi-piper:command",
         ]
     }
 )