| #-*- coding: utf-8 -*- |
| # Copyright (C) 2013 by The Linux Foundation and contributors |
| # |
| # This program is free software: you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation, either version 3 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| |
| import os |
| |
| import grokmirror |
| import logging |
| |
| import time |
| import json |
| import subprocess |
| import random |
| import datetime |
| |
| from fcntl import lockf, LOCK_EX, LOCK_UN, LOCK_NB |
| |
| # default basic logger. We override it later. |
| logger = logging.getLogger(__name__) |
| |
| |
| def run_git_prune(fullpath, config, manifest): |
| prune_ok = True |
| if 'prune' not in config.keys() or config['prune'] != 'yes': |
| return prune_ok |
| |
| # Are any other repos using us in their objects/info/alternates? |
| gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') |
| if grokmirror.is_alt_repo(config['toplevel'], gitdir): |
| logger.info(' prune : skipped, is alternate to other repos') |
| return prune_ok |
| |
| env = {'GIT_DIR': fullpath} |
| args = ['/usr/bin/git', 'prune'] |
| logger.info(' prune : pruning') |
| |
| logger.debug('Running: GIT_DIR=%s %s', env['GIT_DIR'], ' '.join(args)) |
| |
| (output, error) = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| env=env).communicate() |
| |
| error = error.decode().strip() |
| |
| if error: |
| # Put things we recognize as fairly benign into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| ignored = False |
| for estring in config['ignore_errors']: |
| if line.find(estring) != -1: |
| ignored = True |
| debug.append(line) |
| break |
| if not ignored: |
| warn.append(line) |
| |
| if debug: |
| logger.debug('Stderr: %s', '\n'.join(debug)) |
| if warn: |
| logger.critical('Pruning %s returned critical errors:', fullpath) |
| prune_ok = False |
| for entry in warn: |
| logger.critical("\t%s", entry) |
| |
| return prune_ok |
| |
| |
| def run_git_repack(fullpath, config, full_repack=False): |
| # Returns false if we hit any errors on the way |
| repack_ok = True |
| if 'repack' not in config.keys() or config['repack'] != 'yes': |
| return repack_ok |
| |
| repack_flags = '-A -d -l -q' |
| |
| if full_repack and 'full_repack_flags' in config.keys(): |
| repack_flags = config['full_repack_flags'] |
| logger.debug('Time to do a full repack of %s', fullpath) |
| |
| elif 'repack_flags' in config.keys(): |
| repack_flags = config['repack_flags'] |
| |
| flags = repack_flags.split() |
| |
| env = {'GIT_DIR': fullpath} |
| args = ['/usr/bin/git', 'repack'] + flags |
| logger.info(' repack : repacking with %s', repack_flags) |
| |
| logger.debug('Running: GIT_DIR=%s %s', env['GIT_DIR'], ' '.join(args)) |
| |
| (output, error) = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| env=env).communicate() |
| |
| error = error.decode().strip() |
| |
| # With newer versions of git, repack may return warnings that are safe to ignore |
| # so use the same strategy to weed out things we aren't interested in seeing |
| if error: |
| # Put things we recognize as fairly benign into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| ignored = False |
| for estring in config['ignore_errors']: |
| if line.find(estring) != -1: |
| ignored = True |
| debug.append(line) |
| break |
| if not ignored: |
| warn.append(line) |
| |
| if debug: |
| logger.debug('Stderr: %s', '\n'.join(debug)) |
| if warn: |
| logger.critical('Repacking %s returned critical errors:', fullpath) |
| repack_ok = False |
| for entry in warn: |
| logger.critical("\t%s", entry) |
| |
| if not repack_ok: |
| # No need to repack refs if repo is broken |
| return False |
| |
| # repacking refs requires a separate command, so run it now |
| args = ['/usr/bin/git', 'pack-refs', '--all'] |
| logger.info(' repack : repacking refs') |
| |
| logger.debug('Running: GIT_DIR=%s %s', env['GIT_DIR'], ' '.join(args)) |
| |
| (output, error) = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| env=env).communicate() |
| |
| error = error.decode().strip() |
| |
| # pack-refs shouldn't return anything, but use the same ignore_errors block |
| # to weed out any future potential benign warnings |
| if error: |
| # Put things we recognize as fairly benign into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| ignored = False |
| for estring in config['ignore_errors']: |
| if line.find(estring) != -1: |
| ignored = True |
| debug.append(line) |
| break |
| if not ignored: |
| warn.append(line) |
| |
| if debug: |
| logger.debug('Stderr: %s', '\n'.join(debug)) |
| if warn: |
| logger.critical('Repacking refs %s returned critical errors:', |
| fullpath) |
| repack_ok = False |
| for entry in warn: |
| logger.critical("\t%s", entry) |
| |
| return repack_ok |
| |
| def run_git_fsck(fullpath, config, conn_only=False): |
| env = {'GIT_DIR': fullpath} |
| args = ['/usr/bin/git', 'fsck', '--no-dangling'] |
| if conn_only: |
| args.append('--connectivity-only') |
| logger.info(' fsck : running with --connectivity-only') |
| else: |
| logger.info(' fsck : running full checks') |
| |
| logger.debug('Running: GIT_DIR=%s %s', env['GIT_DIR'], ' '.join(args)) |
| |
| (output, error) = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| env=env).communicate() |
| |
| error = error.decode() + output.decode() |
| error = error.strip() |
| |
| if error: |
| # Put things we recognize as fairly benign into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| ignored = False |
| for estring in config['ignore_errors']: |
| if line.find(estring) != -1: |
| ignored = True |
| debug.append(line) |
| break |
| if not ignored: |
| warn.append(line) |
| |
| if debug: |
| logger.debug('Stderr: %s', '\n'.join(debug)) |
| if warn: |
| logger.critical('%s has critical errors:', fullpath) |
| for entry in warn: |
| logger.critical("\t%s", entry) |
| |
| |
| def fsck_mirror(name, config, verbose=False, force=False, conn_only=False, repack_all_quick=False, repack_all_full=False): |
| global logger |
| logger = logging.getLogger(name) |
| logger.setLevel(logging.DEBUG) |
| |
| if 'log' in config.keys(): |
| ch = logging.FileHandler(config['log']) |
| formatter = logging.Formatter( |
| "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") |
| ch.setFormatter(formatter) |
| loglevel = logging.INFO |
| |
| if 'loglevel' in config.keys(): |
| if config['loglevel'] == 'debug': |
| loglevel = logging.DEBUG |
| |
| ch.setLevel(loglevel) |
| logger.addHandler(ch) |
| |
| ch = logging.StreamHandler() |
| formatter = logging.Formatter('%(message)s') |
| ch.setFormatter(formatter) |
| |
| if verbose: |
| ch.setLevel(logging.INFO) |
| else: |
| ch.setLevel(logging.CRITICAL) |
| |
| logger.addHandler(ch) |
| |
| # push it into grokmirror to override the default logger |
| grokmirror.logger = logger |
| |
| if conn_only or repack_all_quick or repack_all_full: |
| force = True |
| |
| logger.info('Running grok-fsck for [%s]', name) |
| |
| # Lock the tree to make sure we only run one instance |
| logger.debug('Attempting to obtain lock on %s', config['lock']) |
| flockh = open(config['lock'], 'w') |
| try: |
| lockf(flockh, LOCK_EX | LOCK_NB) |
| except IOError: |
| logger.info('Could not obtain exclusive lock on %s', config['lock']) |
| logger.info('Assuming another process is running.') |
| return 0 |
| |
| manifest = grokmirror.read_manifest(config['manifest']) |
| |
| if os.path.exists(config['statusfile']): |
| logger.info('Reading status from %s', config['statusfile']) |
| stfh = open(config['statusfile'], 'rb') |
| try: |
| # Format of the status file: |
| # { |
| # '/full/path/to/repository': { |
| # 'lastcheck': 'YYYY-MM-DD' or 'never', |
| # 'nextcheck': 'YYYY-MM-DD', |
| # 'lastrepack': 'YYYY-MM-DD', |
| # 'fingerprint': 'sha-1', |
| # 's_elapsed': seconds, |
| # 'quick_repack_count': times, |
| # }, |
| # ... |
| # } |
| |
| status = json.loads(stfh.read().decode('utf-8')) |
| except: |
| # Huai le! |
| logger.critical('Failed to parse %s', config['statusfile']) |
| lockf(flockh, LOCK_UN) |
| flockh.close() |
| return 1 |
| else: |
| status = {} |
| |
| frequency = int(config['frequency']) |
| |
| today = datetime.datetime.today() |
| todayiso = today.strftime('%F') |
| |
| # Go through the manifest and compare with status |
| for gitdir in manifest.keys(): |
| fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/')) |
| if fullpath not in status.keys(): |
| # Newly added repository |
| if not force: |
| # Randomize next check between now and frequency |
| delay = random.randint(0, frequency) |
| nextdate = today + datetime.timedelta(days=delay) |
| nextcheck = nextdate.strftime('%F') |
| else: |
| nextcheck = todayiso |
| |
| status[fullpath] = { |
| 'lastcheck': 'never', |
| 'nextcheck': nextcheck, |
| } |
| logger.info('%s:', fullpath) |
| logger.info(' added : next check on %s', nextcheck) |
| |
| total_checked = 0 |
| total_elapsed = 0 |
| |
| # Go through status and queue checks for all the dirs that are due today |
| # (unless --force, which is EVERYTHING) |
| for fullpath in list(status): |
| # Check to make sure it's still in the manifest |
| gitdir = fullpath.replace(config['toplevel'], '', 1) |
| gitdir = '/' + gitdir.lstrip('/') |
| |
| if gitdir not in manifest.keys(): |
| del status[fullpath] |
| logger.debug('%s is gone, no longer in manifest', gitdir) |
| continue |
| |
| # If nextcheck is before today, set it to today |
| # XXX: If a system comes up after being in downtime for a while, this |
| # may cause pain for them, so perhaps use randomization here? |
| nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], |
| '%Y-%m-%d') |
| if nextcheck > today and not force: |
| logger.debug('%s not yet due to be checked (nextcheck: %s)', fullpath, |
| status[fullpath]['nextcheck']) |
| continue |
| |
| logger.info('%s:', fullpath) |
| # Calculate elapsed seconds |
| startt = time.time() |
| |
| # Do we need to repack/prune it? |
| do_repack = True |
| fpr = grokmirror.get_repo_fingerprint(config['toplevel'], gitdir, force=True) |
| |
| if conn_only and not (repack_all_quick or repack_all_full): |
| do_repack = False |
| else: |
| # Did the fingerprint change since last time we repacked? |
| oldfpr = None |
| if 'fingerprint' in status[fullpath].keys(): |
| oldfpr = status[fullpath]['fingerprint'] |
| |
| if fpr == oldfpr and not repack_all_full: |
| do_repack = False |
| logger.info(' repack : skipped, unchanged since last run') |
| |
| # do we need to fsck it? |
| do_fsck = True |
| if (repack_all_quick or repack_all_full) and not conn_only: |
| do_fsck = False |
| |
| if do_repack: |
| full_repack = repack_all_full |
| if not 'quick_repack_count' in status[fullpath].keys(): |
| status[fullpath]['quick_repack_count'] = 0 |
| |
| quick_repack_count = status[fullpath]['quick_repack_count'] |
| if 'full_repack_every' in config.keys(): |
| # but did you set 'full_repack_flags' as well? |
| if 'full_repack_flags' not in config.keys(): |
| logger.critical('full_repack_every is set, but not full_repack_flags') |
| else: |
| full_repack_every = int(config['full_repack_every']) |
| # is it anything insane? |
| if full_repack_every < 2: |
| full_repack_every = 2 |
| logger.warning('full_repack_every is too low, forced to 2') |
| |
| # is it time to trigger full repack? |
| # We -1 because if we want a repack every 10th time, then we need to trigger |
| # when current repack count is 9. |
| if quick_repack_count >= full_repack_every-1: |
| logger.debug('Time to do full repack on %s', |
| fullpath) |
| full_repack = True |
| quick_repack_count = 0 |
| status[fullpath]['lastfullrepack'] = todayiso |
| else: |
| logger.debug('Repack count for %s not yet reached ' |
| 'full repack trigger', fullpath) |
| quick_repack_count += 1 |
| |
| repack_ok = run_git_repack(fullpath, config, full_repack) |
| if repack_ok: |
| prune_ok = run_git_prune(fullpath, config, manifest) |
| |
| if repack_ok and prune_ok: |
| status[fullpath]['lastrepack'] = todayiso |
| status[fullpath]['quick_repack_count'] = quick_repack_count |
| else: |
| logger.warning('Repacking %s was unsuccessful, ' |
| 'please run fsck manually!', gitdir) |
| |
| # We fsck last, after repacking and pruning |
| if do_fsck: |
| run_git_fsck(fullpath, config, conn_only) |
| |
| total_checked += 1 |
| |
| endt = time.time() |
| |
| total_elapsed += endt-startt |
| |
| status[fullpath]['fingerprint'] = fpr |
| status[fullpath]['lastcheck'] = todayiso |
| status[fullpath]['s_elapsed'] = int(endt - startt) |
| |
| if force: |
| # Use randomization for next check, again |
| delay = random.randint(1, frequency) |
| else: |
| delay = frequency |
| |
| nextdate = today + datetime.timedelta(days=delay) |
| status[fullpath]['nextcheck'] = nextdate.strftime('%F') |
| logger.info(' done : %ss, next check on %s', |
| status[fullpath]['s_elapsed'], |
| status[fullpath]['nextcheck']) |
| |
| # Write status file after each check, so if the process dies, we won't |
| # have to recheck all the repos we've already checked |
| logger.debug('Updating status file in %s', config['statusfile']) |
| with open(config['statusfile'], 'wb') as stfh: |
| stfh.write(json.dumps(status, indent=2).encode('utf-8')) |
| |
| if not total_checked: |
| logger.info('No new repos to check.') |
| else: |
| logger.info('Repos checked: %s', total_checked) |
| logger.info('Total running time: %s s', int(total_elapsed)) |
| with open(config['statusfile'], 'wb') as stfh: |
| stfh.write(json.dumps(status, indent=2).encode('utf-8')) |
| |
| lockf(flockh, LOCK_UN) |
| flockh.close() |
| |
| |
| def parse_args(): |
| from optparse import OptionParser |
| |
| usage = '''usage: %prog -c fsck.conf |
| Run a git-fsck check on grokmirror-managed repositories. |
| ''' |
| |
| op = OptionParser(usage=usage, version=grokmirror.VERSION) |
| op.add_option('-v', '--verbose', dest='verbose', action='store_true', |
| default=False, |
| help='Be verbose and tell us what you are doing') |
| op.add_option('-f', '--force', dest='force', |
| action='store_true', default=False, |
| help='Force immediate run on all repositories.') |
| op.add_option('-c', '--config', dest='config', |
| help='Location of fsck.conf') |
| op.add_option('--connectivity', dest='conn_only', |
| action='store_true', default=False, |
| help='(Assumes --force): Run git fsck on all repos, but only check connectivity') |
| op.add_option('--repack-all-quick', dest='repack_all_quick', |
| action='store_true', default=False, |
| help='(Assumes --force): Do a quick repack of all repos') |
| op.add_option('--repack-all-full', dest='repack_all_full', |
| action='store_true', default=False, |
| help='(Assumes --force): Do a full repack of all repos') |
| |
| opts, args = op.parse_args() |
| |
| if opts.repack_all_quick and opts.repack_all_full: |
| op.error('Pick either --repack-all-full or --repack-all-quick') |
| |
| if not opts.config: |
| op.error('You must provide the path to the config file') |
| |
| return opts, args |
| |
| |
| def grok_fsck(config, verbose=False, force=False, conn_only=False, repack_all_quick=False, repack_all_full=False): |
| try: |
| from configparser import ConfigParser |
| except ImportError: |
| from ConfigParser import ConfigParser |
| |
| ini = ConfigParser() |
| ini.read(config) |
| |
| for section in ini.sections(): |
| config = {} |
| for (option, value) in ini.items(section): |
| config[option] = value |
| |
| if 'ignore_errors' not in config: |
| config['ignore_errors'] = [ |
| 'notice: HEAD points to an unborn branch', |
| 'notice: No default references', |
| 'contains zero-padded file modes', |
| 'warning: disabling bitmap writing, as some objects are not being packed', |
| 'ignoring extra bitmap file' |
| ] |
| else: |
| ignore_errors = [] |
| for estring in config['ignore_errors'].split('\n'): |
| estring = estring.strip() |
| if len(estring): |
| ignore_errors.append(estring) |
| config['ignore_errors'] = ignore_errors |
| |
| fsck_mirror(section, config, verbose, force, conn_only, repack_all_quick, repack_all_full) |
| |
| |
| def command(): |
| opts, args = parse_args() |
| |
| return grok_fsck(opts.config, opts.verbose, opts.force, opts.conn_only, opts.repack_all_quick, opts.repack_all_full) |
| |
| if __name__ == '__main__': |
| command() |