blob: 53b2fa021008276cb65b175d07407a717315d6fa [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright (C) 2013-2018 by The Linux Foundation and contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import grokmirror
import logging
try:
import urllib.request as urllib_request
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse
except ImportError:
import urllib2 as urllib_request
from urllib2 import HTTPError, URLError
from urlparse import urlparse
import ssl
import time
import gzip
import anyjson
import fnmatch
import subprocess
import shutil
import calendar
import threading
try:
from queue import Queue
except ImportError:
from Queue import Queue
from io import BytesIO
from git import Repo
import enlighten
# default basic logger. We override it later.
logger = logging.getLogger(__name__)
# We use it to bluntly track if there were any repos we couldn't lock
lock_fails = []
# The same repos that didn't clone/pull successfully
git_fails = []
# The same for repos that didn't verify successfully
verify_fails = []
class PullerThread(threading.Thread):
def __init__(self, in_queue, out_queue, config, thread_name, e_bar):
threading.Thread.__init__(self)
self.in_queue = in_queue
self.out_queue = out_queue
self.toplevel = config['toplevel']
self.hookscript = config['post_update_hook']
self.myname = thread_name
self.e_bar = e_bar
def run(self):
# XXX: This is not thread-safe, but okay for now,
# as we only use this for very blunt throttling
global lock_fails
global git_fails
while True:
(gitdir, fingerprint, modified) = self.in_queue.get()
self.e_bar.refresh()
# Do we still need to update it, or has another process
# already done this for us?
todo = True
success = False
logger.debug('[Thread-%s] gitdir=%s, figerprint=%s, modified=%s',
self.myname, gitdir, fingerprint, modified)
fullpath = os.path.join(self.toplevel, gitdir.lstrip('/'))
try:
grokmirror.lock_repo(fullpath, nonblocking=True)
# First, get fingerprint as reported in grokmirror.fingerprint
my_fingerprint = grokmirror.get_repo_fingerprint(
self.toplevel, gitdir, force=False)
# We never rely on timestamps if fingerprints are in play
if fingerprint is None:
ts = grokmirror.get_repo_timestamp(self.toplevel, gitdir)
if ts >= modified:
logger.debug('[Thread-%s] TS same or newer, '
'not pulling %s', self.myname, gitdir)
todo = False
else:
# Recheck the real fingerprint to make sure there is no
# divergence between grokmirror.fingerprint and real repo
logger.debug('[Thread-%s] Rechecking fingerprint in %s',
self.myname, gitdir)
my_fingerprint = grokmirror.get_repo_fingerprint(
self.toplevel, gitdir, force=True)
# Update the fingerprint stored in-repo
grokmirror.set_repo_fingerprint(
self.toplevel, gitdir, fingerprint=my_fingerprint)
if fingerprint == my_fingerprint:
logger.debug('[Thread-%s] FP match, not pulling %s',
self.myname, gitdir)
todo = False
if not todo:
logger.debug('[Thread-%s] %s already latest, skipping',
self.myname, gitdir)
set_agefile(self.toplevel, gitdir, modified)
grokmirror.unlock_repo(fullpath)
self.out_queue.put((gitdir, my_fingerprint, True))
self.in_queue.task_done()
continue
logger.info('[Thread-%s] updating %s', self.myname, gitdir)
success = pull_repo(self.toplevel, gitdir, threadid=self.myname)
logger.debug('[Thread-%s] done pulling %s',
self.myname, gitdir)
if success:
set_agefile(self.toplevel, gitdir, modified)
run_post_update_hook(self.hookscript, self.toplevel, gitdir,
threadid=self.myname)
else:
logger.warning('[Thread-%s] pulling %s unsuccessful',
self.myname, gitdir)
git_fails.append(gitdir)
# Record our current fingerprint and return it
my_fingerprint = grokmirror.set_repo_fingerprint(
self.toplevel, gitdir)
grokmirror.unlock_repo(fullpath)
except IOError:
my_fingerprint = fingerprint
logger.info('[Thread-%s] Could not lock %s, skipping',
self.myname, gitdir)
lock_fails.append(gitdir)
self.out_queue.put((gitdir, my_fingerprint, success))
self.e_bar.update()
self.in_queue.task_done()
def cull_manifest(manifest, config):
includes = config['include'].split('\n')
excludes = config['exclude'].split('\n')
culled = {}
for gitdir in manifest.keys():
# does it fall under include?
for include in includes:
if fnmatch.fnmatch(gitdir, include):
# Yes, but does it fall under excludes?
excluded = False
for exclude in excludes:
if fnmatch.fnmatch(gitdir, exclude):
excluded = True
break
if excluded:
continue
culled[gitdir] = manifest[gitdir]
return culled
def fix_remotes(gitdir, toplevel, site):
# Remove all existing remotes and set new origin
repo = Repo(os.path.join(toplevel, gitdir.lstrip('/')))
remotes = repo.git.remote()
if len(remotes.strip()):
logger.debug('existing remotes: %s', remotes)
for name in remotes.split('\n'):
logger.debug('\tremoving remote: %s', name)
repo.git.remote('rm', name)
# set my origin
origin = os.path.join(site, gitdir.lstrip('/'))
repo.git.remote('add', '--mirror', 'origin', origin)
logger.debug('\tset new origin as %s', origin)
def set_repo_params(toplevel, gitdir, owner, description, reference):
if owner is None and description is None and reference is None:
# Let the default git values be there, then
return
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
repo = Repo(fullpath)
# Make sure the repo is set as gc.auto=0, because running auto-gc
# on a repo that has alternates to other repos can result in
# corruption. We run our own gc inside the grok-fsck process that
# is aware of alternates and won't blow things up.
repo.git.config('gc.auto', '0')
if description is not None:
try:
if repo.description != description:
logger.debug('Setting %s description to: %s',
gitdir, description)
repo.description = description
except IOError:
# Bug in git-python will throw an exception if description
# file is not found
logger.debug('%s description file missing, setting to: %s',
gitdir, description)
repo.description = description
if owner is not None:
logger.debug('Setting %s owner to: %s', gitdir, owner)
repo.git.config('gitweb.owner', owner)
if reference is not None:
# XXX: Removing alternates involves git repack, so we don't support it
# at this point. We also cowardly refuse to change an existing
# alternates entry, as this has high chance of resulting in
# broken git repositories. Only do this when we're going from
# none to some value.
if len(repo.alternates) > 0:
return
objects = os.path.join(toplevel, reference.lstrip('/'), 'objects')
altfile = os.path.join(fullpath, 'objects', 'info', 'alternates')
logger.info('Setting %s alternates to: %s', gitdir, objects)
with open(altfile, 'wt') as altfh:
altfh.write('%s\n' % objects)
def set_agefile(toplevel, gitdir, last_modified):
grokmirror.set_repo_timestamp(toplevel, gitdir, last_modified)
# set agefile, which can be used by cgit to show idle times
# cgit recommends it to be yyyy-mm-dd hh:mm:ss
cgit_fmt = time.strftime('%F %T', time.localtime(last_modified))
agefile = os.path.join(toplevel, gitdir.lstrip('/'),
'info/web/last-modified')
if not os.path.exists(os.path.dirname(agefile)):
os.makedirs(os.path.dirname(agefile))
with open(agefile, 'wt') as fh:
fh.write('%s\n' % cgit_fmt)
logger.debug('Wrote "%s" into %s', cgit_fmt, agefile)
def run_post_update_hook(hookscript, toplevel, gitdir, threadid='X'):
if hookscript == '':
return
if not os.access(hookscript, os.X_OK):
logger.warning('[Thread-%s] post_update_hook %s is not executable',
threadid, hookscript)
return
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
args = [hookscript, fullpath]
logger.debug('[Thread-%s] Running: %s', threadid, ' '.join(args))
(output, error) = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
error = error.decode().strip()
output = output.decode().strip()
if error:
# Put hook stderror into warning
logger.warning('[Thread-%s] Hook Stderr: %s', threadid, error)
if output:
# Put hook stdout into info
logger.info('[Thread-%s] Hook Stdout: %s', threadid, output)
def pull_repo(toplevel, gitdir, threadid='X'):
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
args = ['remote', 'update', '--prune']
retcode, output, error = grokmirror.run_git_command(fullpath, args)
success = False
if retcode == 0:
success = True
if error:
# Put things we recognize into debug
debug = []
warn = []
for line in error.split('\n'):
if line.find('From ') == 0:
debug.append(line)
elif line.find('-> ') > 0:
debug.append(line)
else:
warn.append(line)
if debug:
logger.debug('[Thread-%s] Stderr: %s', threadid, '\n'.join(debug))
if warn:
logger.warning('[Thread-%s] Stderr: %s', threadid, '\n'.join(warn))
return success
def clone_repo(toplevel, gitdir, site, reference=None):
source = os.path.join(site, gitdir.lstrip('/'))
dest = os.path.join(toplevel, gitdir.lstrip('/'))
args = ['clone', '--mirror']
if reference is not None:
reference = os.path.join(toplevel, reference.lstrip('/'))
args.append('--reference')
args.append(reference)
args.append(source)
args.append(dest)
logger.info('Cloning %s into %s', source, dest)
if reference is not None:
logger.info('With reference to %s', reference)
retcode, output, error = grokmirror.run_git_command(None, args)
success = False
if retcode == 0:
success = True
if error:
# Put things we recognize into debug
debug = []
warn = []
for line in error.split('\n'):
if line.find('cloned an empty repository') > 0:
debug.append(line)
if line.find('into bare repository') > 0:
debug.append(line)
else:
warn.append(line)
if debug:
logger.debug('Stderr: %s', '\n'.join(debug))
if warn:
logger.warning('Stderr: %s', '\n'.join(warn))
return success
def clone_order(to_clone, manifest, to_clone_sorted, existing):
# recursively go through the list and resolve dependencies
new_to_clone = []
num_received = len(to_clone)
logger.debug('Another clone_order loop')
for gitdir in to_clone:
reference = manifest[gitdir]['reference']
logger.debug('reference: %s', reference)
if (reference in existing
or reference in to_clone_sorted
or reference is None):
logger.debug('%s: reference found in existing', gitdir)
to_clone_sorted.append(gitdir)
else:
logger.debug('%s: reference not found', gitdir)
new_to_clone.append(gitdir)
if len(new_to_clone) == 0 or len(new_to_clone) == num_received:
# we can resolve no more dependencies, break out
logger.debug('Finished resolving dependencies, quitting')
if len(new_to_clone):
logger.debug('Unresolved: %s', new_to_clone)
to_clone_sorted.extend(new_to_clone)
return
logger.debug('Going for another clone_order loop')
clone_order(new_to_clone, manifest, to_clone_sorted, existing)
def write_projects_list(manifest, config):
import tempfile
import shutil
if 'projectslist' not in config.keys():
return
if config['projectslist'] == '':
return
plpath = config['projectslist']
trimtop = ''
if 'projectslist_trimtop' in config.keys():
trimtop = config['projectslist_trimtop']
add_symlinks = False
if ('projectslist_symlinks' in config.keys()
and config['projectslist_symlinks'] == 'yes'):
add_symlinks = True
(dirname, basename) = os.path.split(plpath)
(fd, tmpfile) = tempfile.mkstemp(prefix=basename, dir=dirname)
logger.info('Writing new %s', plpath)
try:
with open(tmpfile, 'wt') as fh:
for gitdir in manifest:
if trimtop and gitdir.startswith(trimtop):
pgitdir = gitdir[len(trimtop):]
else:
pgitdir = gitdir
# Always remove leading slash, otherwise cgit breaks
pgitdir = pgitdir.lstrip('/')
fh.write('%s\n' % pgitdir)
if add_symlinks and 'symlinks' in manifest[gitdir]:
# Do the same for symlinks
# XXX: Should make this configurable, perhaps
for symlink in manifest[gitdir]['symlinks']:
if trimtop and symlink.startswith(trimtop):
symlink = symlink[len(trimtop):]
symlink = symlink.lstrip('/')
fh.write('%s\n' % symlink)
fh.close()
# set mode to current umask
curmask = os.umask(0)
os.chmod(tmpfile, 0o0666 ^ curmask)
os.umask(curmask)
shutil.move(tmpfile, plpath)
finally:
# If something failed, don't leave tempfiles trailing around
if os.path.exists(tmpfile):
os.unlink(tmpfile)
def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
verify=False, verify_subpath='*', noreuse=False,
purge=False, pretty=False, forcepurge=False):
global logger
global lock_fails
# noinspection PyTypeChecker
em = enlighten.get_manager(series=' -=#')
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
if 'log' in config.keys():
ch = logging.FileHandler(config['log'])
formatter = logging.Formatter(
"[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
loglevel = logging.INFO
if 'loglevel' in config.keys():
if config['loglevel'] == 'debug':
loglevel = logging.DEBUG
ch.setLevel(loglevel)
logger.addHandler(ch)
ch = logging.StreamHandler()
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
if verbose:
ch.setLevel(logging.INFO)
else:
ch.setLevel(logging.CRITICAL)
em.enabled = False
logger.addHandler(ch)
# push it into grokmirror to override the default logger
grokmirror.logger = logger
logger.info('Checking [%s]', name)
mymanifest = config['mymanifest']
if verify:
logger.info('Verifying mirror against %s', config['manifest'])
nomtime = True
if config['manifest'].find('file:///') == 0:
manifile = config['manifest'].replace('file://', '')
if not os.path.exists(manifile):
logger.critical('Remote manifest not found in %s! Quitting!',
config['manifest'])
return 1
fstat = os.stat(manifile)
last_modified = fstat[8]
logger.debug('mtime on %s is: %s', manifile, fstat[8])
if os.path.exists(config['mymanifest']):
fstat = os.stat(config['mymanifest'])
my_last_modified = fstat[8]
logger.debug('Our last-modified is: %s', my_last_modified)
if not (force or nomtime) and last_modified <= my_last_modified:
logger.info('Manifest file unchanged. Quitting.')
return 0
logger.info('Reading new manifest from %s', manifile)
manifest = grokmirror.read_manifest(manifile)
# Don't accept empty manifests -- that indicates something is wrong
if not len(manifest.keys()):
logger.warning('Remote manifest empty or unparseable! Quitting.')
return 1
else:
# Load it from remote host using http and header magic
logger.info('Fetching remote manifest from %s', config['manifest'])
# Do we have username:password@ in the URL?
chunks = urlparse(config['manifest'])
if chunks.netloc.find('@') > 0:
logger.debug('Taking username/password from the URL for basic auth')
(upass, netloc) = chunks.netloc.split('@')
if upass.find(':') > 0:
(username, password) = upass.split(':')
else:
username = upass
password = ''
manifesturl = config['manifest'].replace(chunks.netloc, netloc)
logger.debug('manifesturl=%s', manifesturl)
request = urllib_request.Request(manifesturl)
password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, manifesturl, username, password)
auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
opener = urllib_request.build_opener(auth_handler)
else:
request = urllib_request.Request(config['manifest'])
opener = urllib_request.build_opener()
# Find out if we need to run at all first
if not (force or nomtime) and os.path.exists(mymanifest):
fstat = os.stat(mymanifest)
mtime = fstat[8]
logger.debug('mtime on %s is: %s', mymanifest, mtime)
my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
time.gmtime(mtime))
logger.debug('Our last-modified is: %s', my_last_modified)
request.add_header('If-Modified-Since', my_last_modified)
try:
ufh = opener.open(request, timeout=30)
except HTTPError as ex:
if ex.code == 304:
logger.info('Server says we have the latest manifest. '
'Quitting.')
return 0
logger.warning('Could not fetch %s', config['manifest'])
logger.warning('Server returned: %s', ex)
return 1
except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
logger.warning('Could not fetch %s', config['manifest'])
logger.warning('Error was: %s', ex)
return 1
last_modified = ufh.headers.get('Last-Modified')
last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
last_modified = calendar.timegm(last_modified)
# We don't use read_manifest for the remote manifest, as it can be
# anything, really. For now, blindly open it with gzipfile if it ends
# with .gz. XXX: some http servers will auto-deflate such files.
try:
if config['manifest'].find('.gz') > 0:
fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
else:
fh = ufh
jdata = fh.read().decode('utf-8')
fh.close()
manifest = anyjson.deserialize(jdata)
except Exception as ex:
logger.warning('Failed to parse %s', config['manifest'])
logger.warning('Error was: %s', ex)
return 1
mymanifest = grokmirror.read_manifest(mymanifest)
culled = cull_manifest(manifest, config)
to_clone = []
to_pull = []
existing = []
toplevel = config['toplevel']
if not os.access(toplevel, os.W_OK):
logger.critical('Toplevel %s does not exist or is not writable',
toplevel)
sys.exit(1)
if 'pull_threads' in config.keys():
pull_threads = int(config['pull_threads'])
if pull_threads < 1:
logger.info('pull_threads is less than 1, forcing to 1')
pull_threads = 1
else:
# be conservative
logger.info('pull_threads is not set, consider setting it')
pull_threads = 5
# noinspection PyTypeChecker
e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False)
for gitdir in list(culled):
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
e_cmp.update()
# fingerprints were added in later versions, so deal if the upstream
# manifest doesn't have a fingerprint
if 'fingerprint' not in culled[gitdir]:
culled[gitdir]['fingerprint'] = None
# Attempt to lock the repo
try:
grokmirror.lock_repo(fullpath, nonblocking=True)
except IOError:
logger.info('Could not lock %s, skipping', gitdir)
lock_fails.append(gitdir)
# Force the fingerprint to what we have in mymanifest,
# if we have it.
culled[gitdir]['fingerprint'] = None
if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
culled[gitdir]['fingerprint'] = mymanifest[gitdir][
'fingerprint']
if len(lock_fails) >= pull_threads:
logger.info('Too many repositories locked (%s). Exiting.',
len(lock_fails))
return 0
continue
if verify:
if culled[gitdir]['fingerprint'] is None:
logger.debug('No fingerprint for %s, not verifying', gitdir)
grokmirror.unlock_repo(fullpath)
continue
if not fnmatch.fnmatch(gitdir, verify_subpath):
grokmirror.unlock_repo(fullpath)
continue
logger.debug('Verifying %s', gitdir)
if not os.path.exists(fullpath):
verify_fails.append(gitdir)
logger.info('Verify: %s ABSENT', gitdir)
grokmirror.unlock_repo(fullpath)
continue
my_fingerprint = grokmirror.get_repo_fingerprint(
toplevel, gitdir, force=force)
if my_fingerprint == culled[gitdir]['fingerprint']:
logger.info('Verify: %s OK', gitdir)
else:
logger.critical('Verify: %s FAILED', gitdir)
verify_fails.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
# Is the directory in place?
if os.path.exists(fullpath):
# Did grok-fsck request to reclone it?
rfile = os.path.join(fullpath, 'grokmirror.reclone')
if os.path.exists(rfile):
logger.info('Reclone requested for %s:', gitdir)
with open(rfile, 'r') as rfh:
reason = rfh.read()
logger.info(' %s', reason)
to_clone.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
# Fix owner and description, if necessary
if gitdir in mymanifest.keys():
# This code is hurky and needs to be cleaned up
desc = culled[gitdir].get('description')
owner = culled[gitdir].get('owner')
ref = None
if config['ignore_repo_references'] != 'yes':
ref = culled[gitdir].get('reference')
# dirty hack to force on-disk owner/description checks
# when we're called with -n, in case our manifest
# differs from what is on disk for owner/description/alternates
myref = None
if nomtime:
mydesc = None
myowner = None
else:
mydesc = mymanifest[gitdir].get('description')
myowner = mymanifest[gitdir].get('owner')
if config['ignore_repo_references'] != 'yes':
myref = mymanifest[gitdir].get('reference')
if myowner is None:
myowner = config['default_owner']
if owner is None:
owner = config['default_owner']
if desc != mydesc or owner != myowner or ref != myref:
# we can do this right away without waiting
set_repo_params(toplevel, gitdir, owner, desc, ref)
else:
# It exists on disk, but not in my manifest?
if noreuse:
logger.critical('Found existing git repo in %s', fullpath)
logger.critical('But you asked NOT to reuse repos')
logger.critical('Skipping %s', gitdir)
grokmirror.unlock_repo(fullpath)
continue
logger.info('Setting new origin for %s', gitdir)
fix_remotes(gitdir, toplevel, config['site'])
to_pull.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
# fingerprints were added late, so if we don't have them
# in the remote manifest, fall back on using timestamps
changed = False
if culled[gitdir]['fingerprint'] is not None:
logger.debug('Will use fingerprints to compare %s', gitdir)
my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
gitdir,
force=force)
if my_fingerprint != culled[gitdir]['fingerprint']:
logger.debug('No fingerprint match, will pull %s', gitdir)
changed = True
else:
logger.debug('Fingerprints match, skipping %s', gitdir)
else:
logger.debug('Will use timestamps to compare %s', gitdir)
if force:
logger.debug('Will force-pull %s', gitdir)
changed = True
# set timestamp to 0 as well
grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
else:
ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
if ts < culled[gitdir]['modified']:
changed = True
if changed:
to_pull.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
else:
logger.debug('Repo %s unchanged', gitdir)
# if we don't have a fingerprint for it, add it now
if culled[gitdir]['fingerprint'] is None:
fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
culled[gitdir]['fingerprint'] = fpr
existing.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
else:
# Newly incoming repo
to_clone.append(gitdir)
grokmirror.unlock_repo(fullpath)
continue
# If we got here, something is odd.
# noinspection PyUnreachableCode
logger.critical('Could not figure out what to do with %s', gitdir)
grokmirror.unlock_repo(fullpath)
logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed)
e_cmp.close()
if verify:
if len(verify_fails):
logger.critical('%s repos failed to verify', len(verify_fails))
return 1
else:
logger.info('Verification successful')
return 0
hookscript = config['post_update_hook']
if len(to_pull):
if len(lock_fails) > 0:
pull_threads -= len(lock_fails)
# Don't spin up more threads than we need
if pull_threads > len(to_pull):
pull_threads = len(to_pull)
# exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
# precaution doesn't hurt
if pull_threads <= 0:
logger.info('Too many repositories locked. Exiting.')
return 0
logger.info('Will use %d threads to pull repos', pull_threads)
# noinspection PyTypeChecker
e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False)
logger.info('Updating %s repos from %s', len(to_pull), config['site'])
in_queue = Queue()
out_queue = Queue()
for gitdir in to_pull:
in_queue.put((gitdir, culled[gitdir]['fingerprint'],
culled[gitdir]['modified']))
for i in range(pull_threads):
logger.debug('Spun up thread %s', i)
t = PullerThread(in_queue, out_queue, config, i, e_pull)
t.setDaemon(True)
t.start()
# wait till it's all done
in_queue.join()
logger.info('All threads finished.')
while not out_queue.empty():
# see if any of it failed
(gitdir, my_fingerprint, status) = out_queue.get()
# We always record our fingerprint in our manifest
culled[gitdir]['fingerprint'] = my_fingerprint
if not status:
# To make sure we check this again during next run,
# fudge the manifest accordingly.
logger.debug('Will recheck %s during next run', gitdir)
culled[gitdir] = mymanifest[gitdir]
# this is rather hackish, but effective
last_modified -= 1
logger.info('Updates completed in %0.2fs', e_pull.elapsed)
e_pull.close()
else:
logger.info('No repositories need updating')
# how many lockfiles have we seen?
# If there are more lock_fails than there are
# pull_threads configured, we skip cloning out of caution
if len(to_clone) and len(lock_fails) > pull_threads:
logger.info('Too many repositories locked. Skipping cloning new repos.')
to_clone = []
if len(to_clone):
# noinspection PyTypeChecker
e_clone = em.counter(total=len(to_clone), desc='Cloning :', unit='repos', leave=False)
logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
# we use "existing" to track which repos can be used as references
existing.extend(to_pull)
to_clone_sorted = []
clone_order(to_clone, manifest, to_clone_sorted, existing)
for gitdir in to_clone_sorted:
e_clone.refresh()
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
# Did grok-fsck request to reclone it?
rfile = os.path.join(fullpath, 'grokmirror.reclone')
if os.path.exists(rfile):
logger.debug('Removing %s for reclone', gitdir)
shutil.move(fullpath, '%s.reclone' % fullpath)
shutil.rmtree('%s.reclone' % fullpath)
# Do we still need to clone it, or has another process
# already done this for us?
ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
if ts > 0:
logger.debug('Looks like %s already cloned, skipping', gitdir)
continue
try:
grokmirror.lock_repo(fullpath, nonblocking=True)
except IOError:
logger.info('Could not lock %s, skipping', gitdir)
lock_fails.append(gitdir)
e_clone.update()
continue
reference = None
if config['ignore_repo_references'] != 'yes':
reference = culled[gitdir]['reference']
if reference is not None and reference in existing:
# Make sure we can lock the reference repo
refrepo = os.path.join(toplevel, reference.lstrip('/'))
try:
grokmirror.lock_repo(refrepo, nonblocking=True)
success = clone_repo(toplevel, gitdir, config['site'],
reference=reference)
grokmirror.unlock_repo(refrepo)
except IOError:
logger.info('Cannot lock reference repo %s, skipping %s',
reference, gitdir)
if reference not in lock_fails:
lock_fails.append(reference)
grokmirror.unlock_repo(fullpath)
e_clone.update()
continue
else:
success = clone_repo(toplevel, gitdir, config['site'])
# check dir to make sure cloning succeeded and then add to existing
if os.path.exists(fullpath) and success:
logger.debug('Cloning of %s succeeded, adding to existing',
gitdir)
existing.append(gitdir)
desc = culled[gitdir].get('description')
owner = culled[gitdir].get('owner')
ref = culled[gitdir].get('reference')
if owner is None:
owner = config['default_owner']
set_repo_params(toplevel, gitdir, owner, desc, ref)
set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
gitdir)
culled[gitdir]['fingerprint'] = my_fingerprint
run_post_update_hook(hookscript, toplevel, gitdir)
else:
logger.warning('Was not able to clone %s', gitdir)
# Remove it from our manifest so we can try re-cloning
# next time grok-pull runs
del culled[gitdir]
git_fails.append(gitdir)
grokmirror.unlock_repo(fullpath)
e_clone.update()
logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
e_clone.close()
else:
logger.info('No repositories need cloning')
# loop through all entries and find any symlinks we need to set
# We also collect all symlinks to do purging correctly
symlinks = []
for gitdir in culled.keys():
if 'symlinks' in culled[gitdir].keys():
source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
for symlink in culled[gitdir]['symlinks']:
if symlink not in symlinks:
symlinks.append(symlink)
target = os.path.join(config['toplevel'], symlink.lstrip('/'))
if os.path.exists(source):
if os.path.islink(target):
# are you pointing to where we need you?
if os.path.realpath(target) != source:
# Remove symlink and recreate below
logger.debug('Removed existing wrong symlink %s',
target)
os.unlink(target)
elif os.path.exists(target):
logger.warning('Deleted repo %s, because it is now'
' a symlink to %s' % (target, source))
shutil.rmtree(target)
# Here we re-check if we still need to do anything
if not os.path.exists(target):
logger.info('Symlinking %s -> %s', target, source)
# Make sure the leading dirs are in place
if not os.path.exists(os.path.dirname(target)):
os.makedirs(os.path.dirname(target))
os.symlink(source, target)
manifile = config['mymanifest']
grokmirror.manifest_lock(manifile)
# Is the local manifest newer than last_modified? That would indicate
# that another process has run and "culled" is no longer the latest info
if os.path.exists(manifile):
fstat = os.stat(manifile)
if fstat[8] > last_modified:
logger.info('Local manifest is newer, not saving.')
grokmirror.manifest_unlock(manifile)
return 0
if purge:
to_purge = []
found_repos = 0
for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
gitdir = founddir.replace(config['toplevel'], '')
found_repos += 1
if gitdir not in culled.keys() and gitdir not in symlinks:
to_purge.append(founddir)
if len(to_purge):
# Purge-protection engage
try:
purge_limit = int(config['purgeprotect'])
assert 1 <= purge_limit <= 99
except (ValueError, AssertionError):
logger.critical('Warning: "%s" is not valid for purgeprotect.',
config['purgeprotect'])
logger.critical('Please set to a number between 1 and 99.')
logger.critical('Defaulting to purgeprotect=5.')
purge_limit = 5
purge_pc = len(to_purge) * 100 / found_repos
logger.debug('purgeprotect=%s', purge_limit)
logger.debug('purge prercentage=%s', purge_pc)
if not forcepurge and purge_pc >= purge_limit:
logger.critical('Refusing to purge %s repos (%s%%)',
len(to_purge), purge_pc)
logger.critical('Set purgeprotect to a higher percentage, or'
' override with --force-purge.')
logger.info('Not saving local manifest')
return 1
else:
# noinspection PyTypeChecker
e_purge = em.counter(total=len(to_purge), desc='Purging :', unit='repos', leave=False)
for founddir in to_purge:
e_purge.refresh()
if os.path.islink(founddir):
logger.info('Removing unreferenced symlink %s', gitdir)
os.unlink(founddir)
else:
# is anything using us for alternates?
gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
if grokmirror.is_alt_repo(toplevel, gitdir):
logger.info('Not purging %s because it is used by '
'other repos via alternates', founddir)
else:
try:
logger.info('Purging %s', founddir)
grokmirror.lock_repo(founddir, nonblocking=True)
shutil.rmtree(founddir)
except IOError:
lock_fails.append(gitdir)
logger.info('%s is locked, not purging',
gitdir)
e_purge.update()
logger.info('Purging completed in %0.2fs', e_purge.elapsed)
e_purge.close()
else:
logger.info('No repositories need purging')
# Done with progress bars
em.stop()
# Go through all repos in culled and get the latest local timestamps.
for gitdir in culled:
ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
culled[gitdir]['modified'] = ts
# If there were any lock failures, we fudge last_modified to always
# be older than the server, which will force the next grokmirror run.
if len(lock_fails):
logger.info('%s repos could not be locked. Forcing next run.',
len(lock_fails))
last_modified -= 1
elif len(git_fails):
logger.info('%s repos failed. Forcing next run.', len(git_fails))
last_modified -= 1
# Once we're done, save culled as our new manifest
grokmirror.write_manifest(manifile, culled, mtime=last_modified,
pretty=pretty)
grokmirror.manifest_unlock(manifile)
# write out projects.list, if asked to
write_projects_list(culled, config)
return 127
def parse_args():
from optparse import OptionParser
usage = '''usage: %prog -c repos.conf
Create a grok mirror using the repository configuration found in repos.conf
'''
op = OptionParser(usage=usage, version=grokmirror.VERSION)
op.add_option('-v', '--verbose', dest='verbose', action='store_true',
default=False,
help='Be verbose and tell us what you are doing')
op.add_option('-n', '--no-mtime-check', dest='nomtime',
action='store_true', default=False,
help='Run without checking manifest mtime.')
op.add_option('-f', '--force', dest='force',
action='store_true', default=False,
help='Force full git update regardless of last-modified time.'
' Also useful when repos.conf has changed.')
op.add_option('-p', '--purge', dest='purge',
action='store_true', default=False,
help='Remove any git trees that are no longer in manifest.')
op.add_option('', '--force-purge', dest='forcepurge',
action='store_true', default=False,
help='Force purge despite significant repo deletions.')
op.add_option('-y', '--pretty', dest='pretty', action='store_true',
default=False,
help='Pretty-print manifest (sort keys and add indentation)')
op.add_option('-r', '--no-reuse-existing-repos', dest='noreuse',
action='store_true', default=False,
help='If any existing repositories are found on disk, do NOT '
'update origin and reuse')
op.add_option('-m', '--verify-mirror', dest='verify',
action='store_true', default=False,
help='Do not perform any updates, just verify that mirror '
'matches upstream manifest.')
op.add_option('-s', '--verify-subpath', dest='verify_subpath',
default='*',
help='Only verify a subpath (accepts shell globbing)')
op.add_option('-c', '--config', dest='config',
help='Location of repos.conf')
opts, args = op.parse_args()
if not opts.config:
op.error('You must provide the path to the config file')
return opts, args
def grok_pull(config, verbose=False, force=False, nomtime=False,
verify=False, verify_subpath='*', noreuse=False,
purge=False, pretty=False, forcepurge=False):
try:
from configparser import ConfigParser
except ImportError:
from ConfigParser import ConfigParser
ini = ConfigParser()
ini.read(config)
retval = 0
for section in ini.sections():
# Reset fail trackers for each section
global lock_fails
global git_fails
lock_fails = []
git_fails = []
config = {
'default_owner': 'Grokmirror User',
'post_update_hook': '',
'include': '*',
'exclude': '',
'ignore_repo_references': 'no',
'purgeprotect': '5',
}
for (option, value) in ini.items(section):
config[option] = value
sect_retval = pull_mirror(
section, config, verbose, force, nomtime, verify, verify_subpath,
noreuse, purge, pretty, forcepurge)
if sect_retval == 1:
# Fatal error encountered at some point
retval = 1
elif sect_retval == 127 and retval != 1:
# Successful run with contents modified
retval = 127
return retval
def command():
opts, args = parse_args()
retval = grok_pull(
opts.config, opts.verbose, opts.force, opts.nomtime, opts.verify,
opts.verify_subpath, opts.noreuse, opts.purge, opts.pretty,
opts.forcepurge)
sys.exit(retval)
if __name__ == '__main__':
command()