| # -*- coding: utf-8 -*- |
| # Copyright (C) 2013-2018 by The Linux Foundation and contributors |
| # |
| # This program is free software: you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation, either version 3 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| |
| import os |
| import sys |
| |
| import grokmirror |
| import logging |
| try: |
| import urllib.request as urllib_request |
| from urllib.error import HTTPError, URLError |
| from urllib.parse import urlparse |
| except ImportError: |
| import urllib2 as urllib_request |
| from urllib2 import HTTPError, URLError |
| from urlparse import urlparse |
| |
| import ssl |
| import time |
| import gzip |
| import anyjson |
| import fnmatch |
| import subprocess |
| import shutil |
| import calendar |
| |
| import threading |
| try: |
| from queue import Queue |
| except ImportError: |
| from Queue import Queue |
| |
| from io import BytesIO |
| |
| from git import Repo |
| |
| import enlighten |
| |
| # default basic logger. We override it later. |
| logger = logging.getLogger(__name__) |
| |
| # We use it to bluntly track if there were any repos we couldn't lock |
| lock_fails = [] |
| # The same repos that didn't clone/pull successfully |
| git_fails = [] |
| # The same for repos that didn't verify successfully |
| verify_fails = [] |
| |
| |
| class PullerThread(threading.Thread): |
| def __init__(self, in_queue, out_queue, config, thread_name, e_bar): |
| threading.Thread.__init__(self) |
| self.in_queue = in_queue |
| self.out_queue = out_queue |
| self.toplevel = config['toplevel'] |
| self.hookscript = config['post_update_hook'] |
| self.myname = thread_name |
| self.e_bar = e_bar |
| |
| def run(self): |
| # XXX: This is not thread-safe, but okay for now, |
| # as we only use this for very blunt throttling |
| global lock_fails |
| global git_fails |
| while True: |
| (gitdir, fingerprint, modified) = self.in_queue.get() |
| self.e_bar.refresh() |
| # Do we still need to update it, or has another process |
| # already done this for us? |
| todo = True |
| success = False |
| logger.debug('[Thread-%s] gitdir=%s, figerprint=%s, modified=%s', |
| self.myname, gitdir, fingerprint, modified) |
| |
| fullpath = os.path.join(self.toplevel, gitdir.lstrip('/')) |
| |
| try: |
| grokmirror.lock_repo(fullpath, nonblocking=True) |
| # First, get fingerprint as reported in grokmirror.fingerprint |
| my_fingerprint = grokmirror.get_repo_fingerprint( |
| self.toplevel, gitdir, force=False) |
| |
| # We never rely on timestamps if fingerprints are in play |
| if fingerprint is None: |
| ts = grokmirror.get_repo_timestamp(self.toplevel, gitdir) |
| if ts >= modified: |
| logger.debug('[Thread-%s] TS same or newer, ' |
| 'not pulling %s', self.myname, gitdir) |
| todo = False |
| else: |
| # Recheck the real fingerprint to make sure there is no |
| # divergence between grokmirror.fingerprint and real repo |
| logger.debug('[Thread-%s] Rechecking fingerprint in %s', |
| self.myname, gitdir) |
| my_fingerprint = grokmirror.get_repo_fingerprint( |
| self.toplevel, gitdir, force=True) |
| |
| # Update the fingerprint stored in-repo |
| grokmirror.set_repo_fingerprint( |
| self.toplevel, gitdir, fingerprint=my_fingerprint) |
| |
| if fingerprint == my_fingerprint: |
| logger.debug('[Thread-%s] FP match, not pulling %s', |
| self.myname, gitdir) |
| todo = False |
| |
| if not todo: |
| logger.debug('[Thread-%s] %s already latest, skipping', |
| self.myname, gitdir) |
| set_agefile(self.toplevel, gitdir, modified) |
| grokmirror.unlock_repo(fullpath) |
| self.out_queue.put((gitdir, my_fingerprint, True)) |
| self.in_queue.task_done() |
| continue |
| |
| logger.info('[Thread-%s] updating %s', self.myname, gitdir) |
| success = pull_repo(self.toplevel, gitdir, threadid=self.myname) |
| logger.debug('[Thread-%s] done pulling %s', |
| self.myname, gitdir) |
| |
| if success: |
| set_agefile(self.toplevel, gitdir, modified) |
| run_post_update_hook(self.hookscript, self.toplevel, gitdir, |
| threadid=self.myname) |
| else: |
| logger.warning('[Thread-%s] pulling %s unsuccessful', |
| self.myname, gitdir) |
| git_fails.append(gitdir) |
| |
| # Record our current fingerprint and return it |
| my_fingerprint = grokmirror.set_repo_fingerprint( |
| self.toplevel, gitdir) |
| |
| grokmirror.unlock_repo(fullpath) |
| except IOError: |
| my_fingerprint = fingerprint |
| logger.info('[Thread-%s] Could not lock %s, skipping', |
| self.myname, gitdir) |
| lock_fails.append(gitdir) |
| |
| self.out_queue.put((gitdir, my_fingerprint, success)) |
| self.e_bar.update() |
| self.in_queue.task_done() |
| |
| |
| def cull_manifest(manifest, config): |
| includes = config['include'].split('\n') |
| excludes = config['exclude'].split('\n') |
| |
| culled = {} |
| |
| for gitdir in manifest.keys(): |
| # does it fall under include? |
| for include in includes: |
| if fnmatch.fnmatch(gitdir, include): |
| # Yes, but does it fall under excludes? |
| excluded = False |
| for exclude in excludes: |
| if fnmatch.fnmatch(gitdir, exclude): |
| excluded = True |
| break |
| if excluded: |
| continue |
| |
| culled[gitdir] = manifest[gitdir] |
| |
| return culled |
| |
| |
| def fix_remotes(gitdir, toplevel, site): |
| # Remove all existing remotes and set new origin |
| repo = Repo(os.path.join(toplevel, gitdir.lstrip('/'))) |
| remotes = repo.git.remote() |
| if len(remotes.strip()): |
| logger.debug('existing remotes: %s', remotes) |
| for name in remotes.split('\n'): |
| logger.debug('\tremoving remote: %s', name) |
| repo.git.remote('rm', name) |
| |
| # set my origin |
| origin = os.path.join(site, gitdir.lstrip('/')) |
| repo.git.remote('add', '--mirror', 'origin', origin) |
| logger.debug('\tset new origin as %s', origin) |
| |
| |
| def set_repo_params(toplevel, gitdir, owner, description, reference): |
| if owner is None and description is None and reference is None: |
| # Let the default git values be there, then |
| return |
| |
| fullpath = os.path.join(toplevel, gitdir.lstrip('/')) |
| repo = Repo(fullpath) |
| |
| # Make sure the repo is set as gc.auto=0, because running auto-gc |
| # on a repo that has alternates to other repos can result in |
| # corruption. We run our own gc inside the grok-fsck process that |
| # is aware of alternates and won't blow things up. |
| repo.git.config('gc.auto', '0') |
| |
| if description is not None: |
| try: |
| if repo.description != description: |
| logger.debug('Setting %s description to: %s', |
| gitdir, description) |
| repo.description = description |
| except IOError: |
| # Bug in git-python will throw an exception if description |
| # file is not found |
| logger.debug('%s description file missing, setting to: %s', |
| gitdir, description) |
| repo.description = description |
| |
| if owner is not None: |
| logger.debug('Setting %s owner to: %s', gitdir, owner) |
| repo.git.config('gitweb.owner', owner) |
| |
| if reference is not None: |
| # XXX: Removing alternates involves git repack, so we don't support it |
| # at this point. We also cowardly refuse to change an existing |
| # alternates entry, as this has high chance of resulting in |
| # broken git repositories. Only do this when we're going from |
| # none to some value. |
| if len(repo.alternates) > 0: |
| return |
| |
| objects = os.path.join(toplevel, reference.lstrip('/'), 'objects') |
| altfile = os.path.join(fullpath, 'objects', 'info', 'alternates') |
| logger.info('Setting %s alternates to: %s', gitdir, objects) |
| with open(altfile, 'wt') as altfh: |
| altfh.write('%s\n' % objects) |
| |
| |
| def set_agefile(toplevel, gitdir, last_modified): |
| grokmirror.set_repo_timestamp(toplevel, gitdir, last_modified) |
| |
| # set agefile, which can be used by cgit to show idle times |
| # cgit recommends it to be yyyy-mm-dd hh:mm:ss |
| cgit_fmt = time.strftime('%F %T', time.localtime(last_modified)) |
| agefile = os.path.join(toplevel, gitdir.lstrip('/'), |
| 'info/web/last-modified') |
| if not os.path.exists(os.path.dirname(agefile)): |
| os.makedirs(os.path.dirname(agefile)) |
| with open(agefile, 'wt') as fh: |
| fh.write('%s\n' % cgit_fmt) |
| logger.debug('Wrote "%s" into %s', cgit_fmt, agefile) |
| |
| |
| def run_post_update_hook(hookscript, toplevel, gitdir, threadid='X'): |
| if hookscript == '': |
| return |
| if not os.access(hookscript, os.X_OK): |
| logger.warning('[Thread-%s] post_update_hook %s is not executable', |
| threadid, hookscript) |
| return |
| |
| fullpath = os.path.join(toplevel, gitdir.lstrip('/')) |
| args = [hookscript, fullpath] |
| logger.debug('[Thread-%s] Running: %s', threadid, ' '.join(args)) |
| (output, error) = subprocess.Popen(args, stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE).communicate() |
| |
| error = error.decode().strip() |
| output = output.decode().strip() |
| if error: |
| # Put hook stderror into warning |
| logger.warning('[Thread-%s] Hook Stderr: %s', threadid, error) |
| if output: |
| # Put hook stdout into info |
| logger.info('[Thread-%s] Hook Stdout: %s', threadid, output) |
| |
| |
| def pull_repo(toplevel, gitdir, threadid='X'): |
| fullpath = os.path.join(toplevel, gitdir.lstrip('/')) |
| args = ['remote', 'update', '--prune'] |
| |
| retcode, output, error = grokmirror.run_git_command(fullpath, args) |
| |
| success = False |
| if retcode == 0: |
| success = True |
| |
| if error: |
| # Put things we recognize into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| if line.find('From ') == 0: |
| debug.append(line) |
| elif line.find('-> ') > 0: |
| debug.append(line) |
| else: |
| warn.append(line) |
| if debug: |
| logger.debug('[Thread-%s] Stderr: %s', threadid, '\n'.join(debug)) |
| if warn: |
| logger.warning('[Thread-%s] Stderr: %s', threadid, '\n'.join(warn)) |
| |
| return success |
| |
| |
| def clone_repo(toplevel, gitdir, site, reference=None): |
| source = os.path.join(site, gitdir.lstrip('/')) |
| dest = os.path.join(toplevel, gitdir.lstrip('/')) |
| |
| args = ['clone', '--mirror'] |
| if reference is not None: |
| reference = os.path.join(toplevel, reference.lstrip('/')) |
| args.append('--reference') |
| args.append(reference) |
| |
| args.append(source) |
| args.append(dest) |
| |
| logger.info('Cloning %s into %s', source, dest) |
| if reference is not None: |
| logger.info('With reference to %s', reference) |
| |
| retcode, output, error = grokmirror.run_git_command(None, args) |
| |
| success = False |
| if retcode == 0: |
| success = True |
| |
| if error: |
| # Put things we recognize into debug |
| debug = [] |
| warn = [] |
| for line in error.split('\n'): |
| if line.find('cloned an empty repository') > 0: |
| debug.append(line) |
| if line.find('into bare repository') > 0: |
| debug.append(line) |
| else: |
| warn.append(line) |
| if debug: |
| logger.debug('Stderr: %s', '\n'.join(debug)) |
| if warn: |
| logger.warning('Stderr: %s', '\n'.join(warn)) |
| |
| return success |
| |
| |
| def clone_order(to_clone, manifest, to_clone_sorted, existing): |
| # recursively go through the list and resolve dependencies |
| new_to_clone = [] |
| num_received = len(to_clone) |
| logger.debug('Another clone_order loop') |
| for gitdir in to_clone: |
| reference = manifest[gitdir]['reference'] |
| logger.debug('reference: %s', reference) |
| if (reference in existing |
| or reference in to_clone_sorted |
| or reference is None): |
| logger.debug('%s: reference found in existing', gitdir) |
| to_clone_sorted.append(gitdir) |
| else: |
| logger.debug('%s: reference not found', gitdir) |
| new_to_clone.append(gitdir) |
| if len(new_to_clone) == 0 or len(new_to_clone) == num_received: |
| # we can resolve no more dependencies, break out |
| logger.debug('Finished resolving dependencies, quitting') |
| if len(new_to_clone): |
| logger.debug('Unresolved: %s', new_to_clone) |
| to_clone_sorted.extend(new_to_clone) |
| return |
| |
| logger.debug('Going for another clone_order loop') |
| clone_order(new_to_clone, manifest, to_clone_sorted, existing) |
| |
| |
| def write_projects_list(manifest, config): |
| import tempfile |
| import shutil |
| |
| if 'projectslist' not in config.keys(): |
| return |
| |
| if config['projectslist'] == '': |
| return |
| |
| plpath = config['projectslist'] |
| trimtop = '' |
| |
| if 'projectslist_trimtop' in config.keys(): |
| trimtop = config['projectslist_trimtop'] |
| |
| add_symlinks = False |
| if ('projectslist_symlinks' in config.keys() |
| and config['projectslist_symlinks'] == 'yes'): |
| add_symlinks = True |
| |
| (dirname, basename) = os.path.split(plpath) |
| (fd, tmpfile) = tempfile.mkstemp(prefix=basename, dir=dirname) |
| logger.info('Writing new %s', plpath) |
| |
| try: |
| with open(tmpfile, 'wt') as fh: |
| for gitdir in manifest: |
| if trimtop and gitdir.startswith(trimtop): |
| pgitdir = gitdir[len(trimtop):] |
| else: |
| pgitdir = gitdir |
| |
| # Always remove leading slash, otherwise cgit breaks |
| pgitdir = pgitdir.lstrip('/') |
| fh.write('%s\n' % pgitdir) |
| |
| if add_symlinks and 'symlinks' in manifest[gitdir]: |
| # Do the same for symlinks |
| # XXX: Should make this configurable, perhaps |
| for symlink in manifest[gitdir]['symlinks']: |
| if trimtop and symlink.startswith(trimtop): |
| symlink = symlink[len(trimtop):] |
| |
| symlink = symlink.lstrip('/') |
| fh.write('%s\n' % symlink) |
| |
| fh.close() |
| # set mode to current umask |
| curmask = os.umask(0) |
| os.chmod(tmpfile, 0o0666 ^ curmask) |
| os.umask(curmask) |
| shutil.move(tmpfile, plpath) |
| |
| finally: |
| # If something failed, don't leave tempfiles trailing around |
| if os.path.exists(tmpfile): |
| os.unlink(tmpfile) |
| |
| |
| def pull_mirror(name, config, verbose=False, force=False, nomtime=False, |
| verify=False, verify_subpath='*', noreuse=False, |
| purge=False, pretty=False, forcepurge=False): |
| global logger |
| global lock_fails |
| |
| # noinspection PyTypeChecker |
| em = enlighten.get_manager(series=' -=#') |
| |
| logger = logging.getLogger(name) |
| logger.setLevel(logging.DEBUG) |
| |
| if 'log' in config.keys(): |
| ch = logging.FileHandler(config['log']) |
| formatter = logging.Formatter( |
| "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") |
| ch.setFormatter(formatter) |
| loglevel = logging.INFO |
| |
| if 'loglevel' in config.keys(): |
| if config['loglevel'] == 'debug': |
| loglevel = logging.DEBUG |
| |
| ch.setLevel(loglevel) |
| logger.addHandler(ch) |
| |
| ch = logging.StreamHandler() |
| formatter = logging.Formatter('%(message)s') |
| ch.setFormatter(formatter) |
| |
| if verbose: |
| ch.setLevel(logging.INFO) |
| else: |
| ch.setLevel(logging.CRITICAL) |
| em.enabled = False |
| |
| logger.addHandler(ch) |
| |
| # push it into grokmirror to override the default logger |
| grokmirror.logger = logger |
| |
| logger.info('Checking [%s]', name) |
| mymanifest = config['mymanifest'] |
| |
| if verify: |
| logger.info('Verifying mirror against %s', config['manifest']) |
| nomtime = True |
| |
| if config['manifest'].find('file:///') == 0: |
| manifile = config['manifest'].replace('file://', '') |
| if not os.path.exists(manifile): |
| logger.critical('Remote manifest not found in %s! Quitting!', |
| config['manifest']) |
| return 1 |
| |
| fstat = os.stat(manifile) |
| last_modified = fstat[8] |
| logger.debug('mtime on %s is: %s', manifile, fstat[8]) |
| |
| if os.path.exists(config['mymanifest']): |
| fstat = os.stat(config['mymanifest']) |
| my_last_modified = fstat[8] |
| logger.debug('Our last-modified is: %s', my_last_modified) |
| if not (force or nomtime) and last_modified <= my_last_modified: |
| logger.info('Manifest file unchanged. Quitting.') |
| return 0 |
| |
| logger.info('Reading new manifest from %s', manifile) |
| manifest = grokmirror.read_manifest(manifile) |
| # Don't accept empty manifests -- that indicates something is wrong |
| if not len(manifest.keys()): |
| logger.warning('Remote manifest empty or unparseable! Quitting.') |
| return 1 |
| |
| else: |
| # Load it from remote host using http and header magic |
| logger.info('Fetching remote manifest from %s', config['manifest']) |
| |
| # Do we have username:password@ in the URL? |
| chunks = urlparse(config['manifest']) |
| if chunks.netloc.find('@') > 0: |
| logger.debug('Taking username/password from the URL for basic auth') |
| (upass, netloc) = chunks.netloc.split('@') |
| if upass.find(':') > 0: |
| (username, password) = upass.split(':') |
| else: |
| username = upass |
| password = '' |
| |
| manifesturl = config['manifest'].replace(chunks.netloc, netloc) |
| logger.debug('manifesturl=%s', manifesturl) |
| request = urllib_request.Request(manifesturl) |
| |
| password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() |
| password_mgr.add_password(None, manifesturl, username, password) |
| auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr) |
| opener = urllib_request.build_opener(auth_handler) |
| |
| else: |
| request = urllib_request.Request(config['manifest']) |
| opener = urllib_request.build_opener() |
| |
| # Find out if we need to run at all first |
| if not (force or nomtime) and os.path.exists(mymanifest): |
| fstat = os.stat(mymanifest) |
| mtime = fstat[8] |
| logger.debug('mtime on %s is: %s', mymanifest, mtime) |
| my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT', |
| time.gmtime(mtime)) |
| logger.debug('Our last-modified is: %s', my_last_modified) |
| request.add_header('If-Modified-Since', my_last_modified) |
| |
| try: |
| ufh = opener.open(request, timeout=30) |
| except HTTPError as ex: |
| if ex.code == 304: |
| logger.info('Server says we have the latest manifest. ' |
| 'Quitting.') |
| return 0 |
| logger.warning('Could not fetch %s', config['manifest']) |
| logger.warning('Server returned: %s', ex) |
| return 1 |
| except (URLError, ssl.SSLError, ssl.CertificateError) as ex: |
| logger.warning('Could not fetch %s', config['manifest']) |
| logger.warning('Error was: %s', ex) |
| return 1 |
| |
| last_modified = ufh.headers.get('Last-Modified') |
| last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') |
| last_modified = calendar.timegm(last_modified) |
| |
| # We don't use read_manifest for the remote manifest, as it can be |
| # anything, really. For now, blindly open it with gzipfile if it ends |
| # with .gz. XXX: some http servers will auto-deflate such files. |
| try: |
| if config['manifest'].find('.gz') > 0: |
| fh = gzip.GzipFile(fileobj=BytesIO(ufh.read())) |
| else: |
| fh = ufh |
| |
| jdata = fh.read().decode('utf-8') |
| fh.close() |
| |
| manifest = anyjson.deserialize(jdata) |
| |
| except Exception as ex: |
| logger.warning('Failed to parse %s', config['manifest']) |
| logger.warning('Error was: %s', ex) |
| return 1 |
| |
| mymanifest = grokmirror.read_manifest(mymanifest) |
| |
| culled = cull_manifest(manifest, config) |
| |
| to_clone = [] |
| to_pull = [] |
| existing = [] |
| |
| toplevel = config['toplevel'] |
| if not os.access(toplevel, os.W_OK): |
| logger.critical('Toplevel %s does not exist or is not writable', |
| toplevel) |
| sys.exit(1) |
| |
| if 'pull_threads' in config.keys(): |
| pull_threads = int(config['pull_threads']) |
| if pull_threads < 1: |
| logger.info('pull_threads is less than 1, forcing to 1') |
| pull_threads = 1 |
| else: |
| # be conservative |
| logger.info('pull_threads is not set, consider setting it') |
| pull_threads = 5 |
| |
| # noinspection PyTypeChecker |
| e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False) |
| |
| for gitdir in list(culled): |
| fullpath = os.path.join(toplevel, gitdir.lstrip('/')) |
| e_cmp.update() |
| |
| # fingerprints were added in later versions, so deal if the upstream |
| # manifest doesn't have a fingerprint |
| if 'fingerprint' not in culled[gitdir]: |
| culled[gitdir]['fingerprint'] = None |
| |
| # Attempt to lock the repo |
| try: |
| grokmirror.lock_repo(fullpath, nonblocking=True) |
| except IOError: |
| logger.info('Could not lock %s, skipping', gitdir) |
| lock_fails.append(gitdir) |
| # Force the fingerprint to what we have in mymanifest, |
| # if we have it. |
| culled[gitdir]['fingerprint'] = None |
| if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]: |
| culled[gitdir]['fingerprint'] = mymanifest[gitdir][ |
| 'fingerprint'] |
| if len(lock_fails) >= pull_threads: |
| logger.info('Too many repositories locked (%s). Exiting.', |
| len(lock_fails)) |
| return 0 |
| continue |
| |
| if verify: |
| if culled[gitdir]['fingerprint'] is None: |
| logger.debug('No fingerprint for %s, not verifying', gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| if not fnmatch.fnmatch(gitdir, verify_subpath): |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| logger.debug('Verifying %s', gitdir) |
| if not os.path.exists(fullpath): |
| verify_fails.append(gitdir) |
| logger.info('Verify: %s ABSENT', gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| my_fingerprint = grokmirror.get_repo_fingerprint( |
| toplevel, gitdir, force=force) |
| |
| if my_fingerprint == culled[gitdir]['fingerprint']: |
| logger.info('Verify: %s OK', gitdir) |
| else: |
| logger.critical('Verify: %s FAILED', gitdir) |
| verify_fails.append(gitdir) |
| |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| # Is the directory in place? |
| if os.path.exists(fullpath): |
| # Did grok-fsck request to reclone it? |
| rfile = os.path.join(fullpath, 'grokmirror.reclone') |
| if os.path.exists(rfile): |
| logger.info('Reclone requested for %s:', gitdir) |
| with open(rfile, 'r') as rfh: |
| reason = rfh.read() |
| logger.info(' %s', reason) |
| |
| to_clone.append(gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| # Fix owner and description, if necessary |
| if gitdir in mymanifest.keys(): |
| # This code is hurky and needs to be cleaned up |
| desc = culled[gitdir].get('description') |
| owner = culled[gitdir].get('owner') |
| ref = None |
| if config['ignore_repo_references'] != 'yes': |
| ref = culled[gitdir].get('reference') |
| |
| # dirty hack to force on-disk owner/description checks |
| # when we're called with -n, in case our manifest |
| # differs from what is on disk for owner/description/alternates |
| myref = None |
| if nomtime: |
| mydesc = None |
| myowner = None |
| else: |
| mydesc = mymanifest[gitdir].get('description') |
| myowner = mymanifest[gitdir].get('owner') |
| |
| if config['ignore_repo_references'] != 'yes': |
| myref = mymanifest[gitdir].get('reference') |
| |
| if myowner is None: |
| myowner = config['default_owner'] |
| |
| if owner is None: |
| owner = config['default_owner'] |
| |
| if desc != mydesc or owner != myowner or ref != myref: |
| # we can do this right away without waiting |
| set_repo_params(toplevel, gitdir, owner, desc, ref) |
| |
| else: |
| # It exists on disk, but not in my manifest? |
| if noreuse: |
| logger.critical('Found existing git repo in %s', fullpath) |
| logger.critical('But you asked NOT to reuse repos') |
| logger.critical('Skipping %s', gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| logger.info('Setting new origin for %s', gitdir) |
| fix_remotes(gitdir, toplevel, config['site']) |
| to_pull.append(gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| # fingerprints were added late, so if we don't have them |
| # in the remote manifest, fall back on using timestamps |
| changed = False |
| if culled[gitdir]['fingerprint'] is not None: |
| logger.debug('Will use fingerprints to compare %s', gitdir) |
| my_fingerprint = grokmirror.get_repo_fingerprint(toplevel, |
| gitdir, |
| force=force) |
| |
| if my_fingerprint != culled[gitdir]['fingerprint']: |
| logger.debug('No fingerprint match, will pull %s', gitdir) |
| changed = True |
| else: |
| logger.debug('Fingerprints match, skipping %s', gitdir) |
| else: |
| logger.debug('Will use timestamps to compare %s', gitdir) |
| if force: |
| logger.debug('Will force-pull %s', gitdir) |
| changed = True |
| # set timestamp to 0 as well |
| grokmirror.set_repo_timestamp(toplevel, gitdir, 0) |
| else: |
| ts = grokmirror.get_repo_timestamp(toplevel, gitdir) |
| if ts < culled[gitdir]['modified']: |
| changed = True |
| |
| if changed: |
| to_pull.append(gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| else: |
| logger.debug('Repo %s unchanged', gitdir) |
| # if we don't have a fingerprint for it, add it now |
| if culled[gitdir]['fingerprint'] is None: |
| fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir) |
| culled[gitdir]['fingerprint'] = fpr |
| existing.append(gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| else: |
| # Newly incoming repo |
| to_clone.append(gitdir) |
| grokmirror.unlock_repo(fullpath) |
| continue |
| |
| # If we got here, something is odd. |
| # noinspection PyUnreachableCode |
| logger.critical('Could not figure out what to do with %s', gitdir) |
| grokmirror.unlock_repo(fullpath) |
| |
| logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed) |
| e_cmp.close() |
| |
| if verify: |
| if len(verify_fails): |
| logger.critical('%s repos failed to verify', len(verify_fails)) |
| return 1 |
| else: |
| logger.info('Verification successful') |
| return 0 |
| |
| hookscript = config['post_update_hook'] |
| |
| if len(to_pull): |
| |
| if len(lock_fails) > 0: |
| pull_threads -= len(lock_fails) |
| |
| # Don't spin up more threads than we need |
| if pull_threads > len(to_pull): |
| pull_threads = len(to_pull) |
| |
| # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra |
| # precaution doesn't hurt |
| if pull_threads <= 0: |
| logger.info('Too many repositories locked. Exiting.') |
| return 0 |
| |
| logger.info('Will use %d threads to pull repos', pull_threads) |
| |
| # noinspection PyTypeChecker |
| e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False) |
| logger.info('Updating %s repos from %s', len(to_pull), config['site']) |
| in_queue = Queue() |
| out_queue = Queue() |
| |
| for gitdir in to_pull: |
| in_queue.put((gitdir, culled[gitdir]['fingerprint'], |
| culled[gitdir]['modified'])) |
| |
| for i in range(pull_threads): |
| logger.debug('Spun up thread %s', i) |
| t = PullerThread(in_queue, out_queue, config, i, e_pull) |
| t.setDaemon(True) |
| t.start() |
| |
| # wait till it's all done |
| in_queue.join() |
| logger.info('All threads finished.') |
| |
| while not out_queue.empty(): |
| # see if any of it failed |
| (gitdir, my_fingerprint, status) = out_queue.get() |
| # We always record our fingerprint in our manifest |
| culled[gitdir]['fingerprint'] = my_fingerprint |
| if not status: |
| # To make sure we check this again during next run, |
| # fudge the manifest accordingly. |
| logger.debug('Will recheck %s during next run', gitdir) |
| culled[gitdir] = mymanifest[gitdir] |
| # this is rather hackish, but effective |
| last_modified -= 1 |
| |
| logger.info('Updates completed in %0.2fs', e_pull.elapsed) |
| e_pull.close() |
| else: |
| logger.info('No repositories need updating') |
| |
| # how many lockfiles have we seen? |
| # If there are more lock_fails than there are |
| # pull_threads configured, we skip cloning out of caution |
| if len(to_clone) and len(lock_fails) > pull_threads: |
| logger.info('Too many repositories locked. Skipping cloning new repos.') |
| to_clone = [] |
| |
| if len(to_clone): |
| # noinspection PyTypeChecker |
| e_clone = em.counter(total=len(to_clone), desc='Cloning :', unit='repos', leave=False) |
| logger.info('Cloning %s repos from %s', len(to_clone), config['site']) |
| # we use "existing" to track which repos can be used as references |
| existing.extend(to_pull) |
| |
| to_clone_sorted = [] |
| clone_order(to_clone, manifest, to_clone_sorted, existing) |
| |
| for gitdir in to_clone_sorted: |
| e_clone.refresh() |
| |
| fullpath = os.path.join(toplevel, gitdir.lstrip('/')) |
| |
| # Did grok-fsck request to reclone it? |
| rfile = os.path.join(fullpath, 'grokmirror.reclone') |
| if os.path.exists(rfile): |
| logger.debug('Removing %s for reclone', gitdir) |
| shutil.move(fullpath, '%s.reclone' % fullpath) |
| shutil.rmtree('%s.reclone' % fullpath) |
| |
| # Do we still need to clone it, or has another process |
| # already done this for us? |
| ts = grokmirror.get_repo_timestamp(toplevel, gitdir) |
| |
| if ts > 0: |
| logger.debug('Looks like %s already cloned, skipping', gitdir) |
| continue |
| |
| try: |
| grokmirror.lock_repo(fullpath, nonblocking=True) |
| except IOError: |
| logger.info('Could not lock %s, skipping', gitdir) |
| lock_fails.append(gitdir) |
| e_clone.update() |
| continue |
| |
| reference = None |
| if config['ignore_repo_references'] != 'yes': |
| reference = culled[gitdir]['reference'] |
| |
| if reference is not None and reference in existing: |
| # Make sure we can lock the reference repo |
| refrepo = os.path.join(toplevel, reference.lstrip('/')) |
| try: |
| grokmirror.lock_repo(refrepo, nonblocking=True) |
| success = clone_repo(toplevel, gitdir, config['site'], |
| reference=reference) |
| grokmirror.unlock_repo(refrepo) |
| except IOError: |
| logger.info('Cannot lock reference repo %s, skipping %s', |
| reference, gitdir) |
| if reference not in lock_fails: |
| lock_fails.append(reference) |
| |
| grokmirror.unlock_repo(fullpath) |
| e_clone.update() |
| continue |
| else: |
| success = clone_repo(toplevel, gitdir, config['site']) |
| |
| # check dir to make sure cloning succeeded and then add to existing |
| if os.path.exists(fullpath) and success: |
| logger.debug('Cloning of %s succeeded, adding to existing', |
| gitdir) |
| existing.append(gitdir) |
| |
| desc = culled[gitdir].get('description') |
| owner = culled[gitdir].get('owner') |
| ref = culled[gitdir].get('reference') |
| |
| if owner is None: |
| owner = config['default_owner'] |
| set_repo_params(toplevel, gitdir, owner, desc, ref) |
| set_agefile(toplevel, gitdir, culled[gitdir]['modified']) |
| my_fingerprint = grokmirror.set_repo_fingerprint(toplevel, |
| gitdir) |
| culled[gitdir]['fingerprint'] = my_fingerprint |
| run_post_update_hook(hookscript, toplevel, gitdir) |
| else: |
| logger.warning('Was not able to clone %s', gitdir) |
| # Remove it from our manifest so we can try re-cloning |
| # next time grok-pull runs |
| del culled[gitdir] |
| git_fails.append(gitdir) |
| |
| grokmirror.unlock_repo(fullpath) |
| e_clone.update() |
| |
| logger.info('Clones completed in %0.2fs' % e_clone.elapsed) |
| e_clone.close() |
| |
| else: |
| logger.info('No repositories need cloning') |
| |
| # loop through all entries and find any symlinks we need to set |
| # We also collect all symlinks to do purging correctly |
| symlinks = [] |
| for gitdir in culled.keys(): |
| if 'symlinks' in culled[gitdir].keys(): |
| source = os.path.join(config['toplevel'], gitdir.lstrip('/')) |
| for symlink in culled[gitdir]['symlinks']: |
| if symlink not in symlinks: |
| symlinks.append(symlink) |
| target = os.path.join(config['toplevel'], symlink.lstrip('/')) |
| |
| if os.path.exists(source): |
| if os.path.islink(target): |
| # are you pointing to where we need you? |
| if os.path.realpath(target) != source: |
| # Remove symlink and recreate below |
| logger.debug('Removed existing wrong symlink %s', |
| target) |
| os.unlink(target) |
| elif os.path.exists(target): |
| logger.warning('Deleted repo %s, because it is now' |
| ' a symlink to %s' % (target, source)) |
| shutil.rmtree(target) |
| |
| # Here we re-check if we still need to do anything |
| if not os.path.exists(target): |
| logger.info('Symlinking %s -> %s', target, source) |
| # Make sure the leading dirs are in place |
| if not os.path.exists(os.path.dirname(target)): |
| os.makedirs(os.path.dirname(target)) |
| os.symlink(source, target) |
| |
| manifile = config['mymanifest'] |
| grokmirror.manifest_lock(manifile) |
| |
| # Is the local manifest newer than last_modified? That would indicate |
| # that another process has run and "culled" is no longer the latest info |
| if os.path.exists(manifile): |
| fstat = os.stat(manifile) |
| if fstat[8] > last_modified: |
| logger.info('Local manifest is newer, not saving.') |
| grokmirror.manifest_unlock(manifile) |
| return 0 |
| |
| if purge: |
| to_purge = [] |
| found_repos = 0 |
| for founddir in grokmirror.find_all_gitdirs(config['toplevel']): |
| gitdir = founddir.replace(config['toplevel'], '') |
| found_repos += 1 |
| |
| if gitdir not in culled.keys() and gitdir not in symlinks: |
| to_purge.append(founddir) |
| |
| if len(to_purge): |
| # Purge-protection engage |
| try: |
| purge_limit = int(config['purgeprotect']) |
| assert 1 <= purge_limit <= 99 |
| except (ValueError, AssertionError): |
| logger.critical('Warning: "%s" is not valid for purgeprotect.', |
| config['purgeprotect']) |
| logger.critical('Please set to a number between 1 and 99.') |
| logger.critical('Defaulting to purgeprotect=5.') |
| purge_limit = 5 |
| |
| purge_pc = len(to_purge) * 100 / found_repos |
| logger.debug('purgeprotect=%s', purge_limit) |
| logger.debug('purge prercentage=%s', purge_pc) |
| |
| if not forcepurge and purge_pc >= purge_limit: |
| logger.critical('Refusing to purge %s repos (%s%%)', |
| len(to_purge), purge_pc) |
| logger.critical('Set purgeprotect to a higher percentage, or' |
| ' override with --force-purge.') |
| logger.info('Not saving local manifest') |
| return 1 |
| else: |
| # noinspection PyTypeChecker |
| e_purge = em.counter(total=len(to_purge), desc='Purging :', unit='repos', leave=False) |
| for founddir in to_purge: |
| e_purge.refresh() |
| if os.path.islink(founddir): |
| logger.info('Removing unreferenced symlink %s', gitdir) |
| os.unlink(founddir) |
| else: |
| # is anything using us for alternates? |
| gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/') |
| if grokmirror.is_alt_repo(toplevel, gitdir): |
| logger.info('Not purging %s because it is used by ' |
| 'other repos via alternates', founddir) |
| else: |
| try: |
| logger.info('Purging %s', founddir) |
| grokmirror.lock_repo(founddir, nonblocking=True) |
| shutil.rmtree(founddir) |
| except IOError: |
| lock_fails.append(gitdir) |
| logger.info('%s is locked, not purging', |
| gitdir) |
| e_purge.update() |
| |
| logger.info('Purging completed in %0.2fs', e_purge.elapsed) |
| e_purge.close() |
| |
| else: |
| logger.info('No repositories need purging') |
| |
| # Done with progress bars |
| em.stop() |
| |
| # Go through all repos in culled and get the latest local timestamps. |
| for gitdir in culled: |
| ts = grokmirror.get_repo_timestamp(toplevel, gitdir) |
| culled[gitdir]['modified'] = ts |
| |
| # If there were any lock failures, we fudge last_modified to always |
| # be older than the server, which will force the next grokmirror run. |
| if len(lock_fails): |
| logger.info('%s repos could not be locked. Forcing next run.', |
| len(lock_fails)) |
| last_modified -= 1 |
| elif len(git_fails): |
| logger.info('%s repos failed. Forcing next run.', len(git_fails)) |
| last_modified -= 1 |
| |
| # Once we're done, save culled as our new manifest |
| grokmirror.write_manifest(manifile, culled, mtime=last_modified, |
| pretty=pretty) |
| |
| grokmirror.manifest_unlock(manifile) |
| |
| # write out projects.list, if asked to |
| write_projects_list(culled, config) |
| |
| return 127 |
| |
| |
| def parse_args(): |
| from optparse import OptionParser |
| |
| usage = '''usage: %prog -c repos.conf |
| Create a grok mirror using the repository configuration found in repos.conf |
| ''' |
| |
| op = OptionParser(usage=usage, version=grokmirror.VERSION) |
| op.add_option('-v', '--verbose', dest='verbose', action='store_true', |
| default=False, |
| help='Be verbose and tell us what you are doing') |
| op.add_option('-n', '--no-mtime-check', dest='nomtime', |
| action='store_true', default=False, |
| help='Run without checking manifest mtime.') |
| op.add_option('-f', '--force', dest='force', |
| action='store_true', default=False, |
| help='Force full git update regardless of last-modified time.' |
| ' Also useful when repos.conf has changed.') |
| op.add_option('-p', '--purge', dest='purge', |
| action='store_true', default=False, |
| help='Remove any git trees that are no longer in manifest.') |
| op.add_option('', '--force-purge', dest='forcepurge', |
| action='store_true', default=False, |
| help='Force purge despite significant repo deletions.') |
| op.add_option('-y', '--pretty', dest='pretty', action='store_true', |
| default=False, |
| help='Pretty-print manifest (sort keys and add indentation)') |
| op.add_option('-r', '--no-reuse-existing-repos', dest='noreuse', |
| action='store_true', default=False, |
| help='If any existing repositories are found on disk, do NOT ' |
| 'update origin and reuse') |
| op.add_option('-m', '--verify-mirror', dest='verify', |
| action='store_true', default=False, |
| help='Do not perform any updates, just verify that mirror ' |
| 'matches upstream manifest.') |
| op.add_option('-s', '--verify-subpath', dest='verify_subpath', |
| default='*', |
| help='Only verify a subpath (accepts shell globbing)') |
| op.add_option('-c', '--config', dest='config', |
| help='Location of repos.conf') |
| |
| opts, args = op.parse_args() |
| |
| if not opts.config: |
| op.error('You must provide the path to the config file') |
| |
| return opts, args |
| |
| |
| def grok_pull(config, verbose=False, force=False, nomtime=False, |
| verify=False, verify_subpath='*', noreuse=False, |
| purge=False, pretty=False, forcepurge=False): |
| try: |
| from configparser import ConfigParser |
| except ImportError: |
| from ConfigParser import ConfigParser |
| |
| ini = ConfigParser() |
| ini.read(config) |
| |
| retval = 0 |
| |
| for section in ini.sections(): |
| # Reset fail trackers for each section |
| global lock_fails |
| global git_fails |
| |
| lock_fails = [] |
| git_fails = [] |
| |
| config = { |
| 'default_owner': 'Grokmirror User', |
| 'post_update_hook': '', |
| 'include': '*', |
| 'exclude': '', |
| 'ignore_repo_references': 'no', |
| 'purgeprotect': '5', |
| } |
| |
| for (option, value) in ini.items(section): |
| config[option] = value |
| |
| sect_retval = pull_mirror( |
| section, config, verbose, force, nomtime, verify, verify_subpath, |
| noreuse, purge, pretty, forcepurge) |
| if sect_retval == 1: |
| # Fatal error encountered at some point |
| retval = 1 |
| elif sect_retval == 127 and retval != 1: |
| # Successful run with contents modified |
| retval = 127 |
| return retval |
| |
| |
| def command(): |
| opts, args = parse_args() |
| |
| retval = grok_pull( |
| opts.config, opts.verbose, opts.force, opts.nomtime, opts.verify, |
| opts.verify_subpath, opts.noreuse, opts.purge, opts.pretty, |
| opts.forcepurge) |
| |
| sys.exit(retval) |
| |
| |
| if __name__ == '__main__': |
| command() |