# sim-sync -- sync source trees across machines using information from the machine database # # Michael Thomas # Center for Computation & Technology # Louisiana State University # import sys, os, re optionGroups = ['sim-sync'] Purpose = "sync cactus sourcetree to a remote machine" ############################################ import simenv,simsubs,simlib from libutil import * known_commands = [] usage_strings = {'sync': 'sync cactus sourcetree to a remote machine'} global local_suffix global local_sourcebasedir global localMachineName global localMachineEntry local_suffix = None local_sourcebasedir = None localMachineName = None localMachineEntry = None ############################### FUNCTIONS ############################### def GetRsyncInfo(machineEntry): return GetSubbedInfo(machineEntry, ['rsynccmd', 'rsyncopts']) def GetSSHInfo(machineEntry): return GetSubbedInfo(machineEntry, ['sshcmd', 'sshopts']) def GetSubbedInfo(machineEntry, keys): DefineDatabase = simsubs.DefineDatabase() simlib.VerifyKeys(machineEntry, keys) data = list() for k in keys: d = DefineDatabase.SubAll(machineEntry.GetKey(k)) data.append(d) return data def RsyncVersion(rsyncInfo): (rsynccmd, rsyncopts) = rsyncInfo versionText = simlib.ExecuteCommand('%s --version' % rsynccmd, True) (major, minor, revision) = re.match('rsync version (\d).(\d).(\d)', versionText).groups() return (int(major), int(minor), int(revision)) def CompileCommand(machineName, rsyncInfo, paths, oldRsync=False): global localMachineName global localMachineEntry global local_sourcebasedir global local_suffix DefineDatabase = simsubs.DefineDatabase() (rsynccmd, rsyncopts) = rsyncInfo rsyncversion = RsyncVersion(rsyncInfo) if machineName == localMachineName: fatal("cannot sync to local machine") machineEntry = simenv.ConfigurationDatabase.GetMachine(machineName) # get our IO machine -- if iomachine doesn't exist, it returns itself. ioMachine = simlib.GetIOMachine(machineName) if ioMachine != machineName: machineEntry = simenv.ConfigurationDatabase.GetMachine(ioMachine) machineName = ioMachine # make sure we have all the info we need. mreq_keys = ['user', 'sourcebasedir', 'hostname', 'rsynccmd', 'rsyncopts', 'sshcmd', 'sshopts'] simlib.VerifyKeys(machineEntry, mreq_keys) DefineDatabase.Set('USER', machineEntry.user) sourcebasedir = DefineDatabase.SubAll(machineEntry.sourcebasedir) # There need to be two define databases, one for the local and # another for the remote system. Currently, USER is for the # remote and SOURCEDIR is for the local system -- this is # inconsistent. #local_sourcebasedir = DefineDatabase.SubAll(localMachineEntry.sourcebasedir) source_name = local_suffix #source_name = simlib.GetDirSuffix(local_sourcebasedir) local_sourcedir = simlib.BuildPath(local_sourcebasedir, source_name) DefineDatabase.Set('SOURCEDIR', local_sourcedir) localsshsetup = DefineDatabase.SubAll(machineEntry.localsshsetup) sshcmd = machineEntry.sshcmd sshopts = machineEntry.sshopts sshcmd = "%s %s" % (sshcmd, sshopts) trampoline = simlib.GetTrampoline(ioMachine) if trampoline: tentry = simenv.ConfigurationDatabase.GetMachine(trampoline) trampoline_sourcebasedir = simlib.GetSourceBaseDir(tentry) trampoline_sourcedir = os.path.join(trampoline_sourcebasedir, source_name) DefineDatabase.Set('SOURCEDIR', trampoline_sourcedir) sshcmd = DefineDatabase.SubAll(sshcmd) DefineDatabase.Set('SOURCEDIR', local_sourcedir) sshcmd = simlib.GetSSHCommand(trampoline, sshcmd) sshcmd = DefineDatabase.SubAll(sshcmd) # If there is more than one explicit path given only accept top-level directories # This could be extended to do the right thing when multiple explicit paths are given # but would require running rsync multiple times rsyncfiles = [] if len(paths) == 1: rsyncfiles = paths [head, tail] = os.path.split(rsyncfiles[0]) local_suffix = os.path.join(local_suffix, head) else: for file in paths: if file == '': continue [head, tail] = os.path.split(file) if head != "": fatal("Only top level paths may be specified when syncing multiple paths but '%s' given." % file) elif not os.path.exists(tail): warning("Specified sync path '%s' not found." % tail) else: rsyncfiles.append(tail) rsyncoptions = ['--archive', '--hard-links', '--sparse', '--verbose', '--progress', '--partial', '--delete', '--stats', '--compress'] # We need to use a different rules file for pre 3.0.0 versions of rsync if rsyncversion[0] < 3 or oldRsync: info('Old rsync version in use (local version is %s.%s.%s). Upgrade to at least 3.0.0 both locally and remotely for best performance.' % rsyncversion) rsyncoptions.append('--filter \'merge %s/etc/filter.prersync3.rules\'' % simenv.BASE_PATH) else: rsyncoptions.append('--filter \'merge %s/etc/filter.rules\'' % simenv.BASE_PATH) fullpath = "%s@%s:%s%s%s" % (machineEntry.user, machineEntry.hostname, sourcebasedir, os.sep, local_suffix) arguments = " ".join(simlib.GetArguments()) cmd = "%s --rsh=%s --rsync-path=%s %s %s %s" % (rsynccmd, simlib.QuoteSafe(sshcmd), simlib.QuoteSafe(machineEntry.rsynccmd), rsyncopts, machineEntry.rsyncopts, arguments) cmd = "%s %s" % (cmd, " ".join(rsyncoptions)) cmd = "%s %s" % (cmd, " ".join(rsyncfiles)) cmd = "%s %s" % (cmd, fullpath) mkdirssh = "%s@%s" % (machineEntry.user, machineEntry.hostname) mkdirpath = "%s%s%s" % (sourcebasedir, os.sep, local_suffix) mkdircmd = "%s %s mkdir -p %s" % (sshcmd, mkdirssh, mkdirpath) cmd = "cd '%s' && { %s; } && %s && %s" % (local_sourcedir, localsshsetup, mkdircmd, cmd) return cmd ############################### MAIN ############################### simenv.CommandDoc['sync'] = """ Usage ----- sim sync [options] [ *machine* ] Synchronise local Cactus source tree to *machine*. The first time this operation is performed, it can take some time as all the source is copied. Subsequent calls will only synchronise the files which have changed. """ simenv.CommandOptions['sync'] = "sim-sync.ini" def main(): ## HEADER ## simlib.RequireMachine() global local_suffix global local_sourcebasedir global localMachineName global localMachineEntry machineList = [] pathList = [] # Include sync-parfiles in paths if --sync-parfiles option is given if simenv.OptionsManager.GetOption('sync-parfiles'): pathList.extend(simenv.ConfigurationDatabase.GetConfigOption('sync-parfiles')) # Include sync-sourcetree in paths if --sync-sourcetree option is given if simenv.OptionsManager.GetOption('sync-sourcetree'): pathList.extend(simenv.ConfigurationDatabase.GetConfigOption('sync-sources')) if simenv.OptionsManager.HasOption('sync-path'): pathList.extend(simenv.OptionsManager.GetOption('sync-path')) # Build up the list of machines and paths addPath = False for arg in simenv.OptionsManager.args: if arg == 'sync': continue machineList.append(arg) # If no paths were specified, we use everything in sync-parfiles and sync-sources if len(pathList) == 0: pathList.extend(simenv.ConfigurationDatabase.GetConfigOption('sync-parfiles')) pathList.extend(simenv.ConfigurationDatabase.GetConfigOption('sync-sources')) if len(machineList) == 0: fatal("Error: no machines specified") info("using machines: %s" % machineList) info("current working directory: %s" % os.getcwd()) info("synchronising paths: %s" % pathList) if simenv.CACTUS_PATH == None: fatal("cannot proceed with an unknown CACTUS_PATH") cactusDir = simenv.CACTUS_PATH info("Cactus Directory: %s" % cactusDir) # get our local_sourcebasedir local_sourcebasedir = simlib.GetLocalSourceBaseDir() local_suffix = simenv.CACTUS_PATH.replace("%s/" % local_sourcebasedir, "") info("CACTUS_PATH: %s" % simenv.CACTUS_PATH) info("local_sourcebasedir: %s" % local_sourcebasedir) info("local_suffix: %s" % local_suffix) localMachineName = simenv.LocalMachine localMachineEntry = simenv.LocalMachineEntry # returns list (rsynccmd, rsyncopts) rsyncInfo = GetRsyncInfo(localMachineEntry) errors = 0 for m in machineList: cmd = CompileCommand(m, rsyncInfo, pathList) ret = simlib.ExecuteCommand(cmd) # If we get a protocol error, try again with less modern rsync options if ret == 512: cmd = CompileCommand(m, rsyncInfo, pathList, True) ret = simlib.ExecuteCommand(cmd) if ret != 0: errors = errors + 1 display("\nError while syncing to %s." % m) if ret == 0: info("\nSync complete.") else: display("\nErrors occurred during synchronisation.")