From f71a5c6e72f1503f42141a00e6bfdfe3daff2e95 Mon Sep 17 00:00:00 2001 From: German Service Network Date: Sat, 13 Jun 2020 12:37:43 +0200 Subject: [PATCH] Remove unused codeline, Add logging, Snapshot url https --- raspbmirror.py | 183 +++++++++++++++++-------------------------------- 1 file changed, 62 insertions(+), 121 deletions(-) diff --git a/raspbmirror.py b/raspbmirror.py index 7012105..29cff78 100644 --- a/raspbmirror.py +++ b/raspbmirror.py @@ -9,8 +9,6 @@ import hashlib import gzip import urllib.request import stat -#from sortedcontainers import SortedDict -#from sortedcontainers import SortedList from collections import deque from collections import OrderedDict from datetime import datetime @@ -19,11 +17,12 @@ import argparse import re from heapq import heappush, heappop import fcntl +import logging parser = argparse.ArgumentParser(description="mirror raspbian repo.") parser.add_argument("baseurl", help="base url for source repo (e.g. https://archive.raspbian.org/ )",nargs='?') parser.add_argument("mdurl", help="base url for mirrordirector or local source mirror (e.g. https://mirrordirector.raspbian.org/ )",nargs='?') -parser.add_argument("hpurl", help="base url for last result hash pool (e.g. http://snapshot.raspbian.org/hashpool )",nargs='?') +parser.add_argument("hpurl", help="base url for last result hash pool (e.g. https://snapshot.raspbian.org/hashpool )",nargs='?') parser.add_argument("--internal", help=argparse.SUPPRESS) #base URL for private repo (internal use only) parser.add_argument("--sourcepool", help="specify a source pool to look for packages in before downloading them (useful if maintaining multiple mirrors)",action='append') @@ -47,13 +46,16 @@ args = parser.parse_args() lockfd = os.open('.',os.O_RDONLY) fcntl.flock(lockfd,fcntl.LOCK_EX | fcntl.LOCK_NB) +logpath = os.path.dirname(os.path.realpath(__file__)) +logging.basicConfig(filename=logpath+'/raspbmirror.log',format='%(asctime)s %(levelname)s: %(message)s', level=logging.DEBUG) + def addfilefromdebarchive(filestoverify,filequeue,filename,sha256,size): size = int(size) sha256andsize = [sha256,size,'M'] if filename in filestoverify: if (sha256andsize[0:2] != filestoverify[filename][0:2]): if stage == 'scanexisting': - print('warning: same file with different hash/size during scanexisting phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize)) + logging.warning('warning: same file with different hash/size during scanexisting phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize)) #find existing sha1/size of file on disk if it exists if os.path.isfile(filename): f = open(filename,'rb') @@ -68,7 +70,7 @@ def addfilefromdebarchive(filestoverify,filequeue,filename,sha256,size): size = None filestoverify[filename] = [sha256,size,'M'] else: - print('error: same file with different hash/size during downloadnew phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize)) + logging.error('error: same file with different hash/size during downloadnew phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize)) sys.exit(1) else: filestoverify[filename] = sha256andsize @@ -92,16 +94,16 @@ shaallowed = re.compile(b'[a-z0-9]+',re.ASCII) def ensuresafepath(path): pathsplit = path.split(b'/') if path[0] == '/': - print("path must be relative") + logging.info("path must be relative") sys.exit(1) for component in pathsplit: if not pfnallowed.fullmatch(component): - print("file name contains unexpected characters") + logging.info("file name contains unexpected characters") sys.exit(1) elif component[0] == '.': - print("filenames starting with a dot are not allowed") + logging.info("filenames starting with a dot are not allowed") sys.exit(1) - + def geturl(fileurl): with urllib.request.urlopen(fileurl.decode('ascii')) as response: data = response.read() @@ -129,55 +131,18 @@ def makenewpath(path): def getfile(path,sha256,size): ensuresafepath(path) if not shaallowed.fullmatch(sha256): - print('invalid character in sha256 hash') + logging.info('invalid character in sha256 hash') sys.exit(1) - #hashfn = b'../hashpool/' + sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256 - #if os.path.isfile(hashfn): - # if os.path.getsize(hashfn) != size: - # print('size mismatch on existing file in hash pool') - # sys.exit(1) - #else: - # secondhashfn = None - # if args.secondpool is not None: - # secondhashfn = os.path.join(args.secondpool.encode('ascii'),sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256) - # #print(secondhashfn) - # if not os.path.isfile(secondhashfn): - # secondhashfn = None - # if secondhashfn is None: - # else: - # print('copying '+path.decode('ascii')+' with hash '+sha256.decode('ascii')+' from secondary pool') - # f = open(secondhashfn,'rb') - # data = f.read() - # f.close() - # ts = os.path.getmtime(secondhashfn) - # sha256hash = hashlib.sha256(data) - # sha256hashed = sha256hash.hexdigest().encode('ascii') - # if (sha256 != sha256hashed): - # #print(repr(filesize)) - # #print(repr(sha256)) - # #print(repr(sha256hashed)) - # print('hash mismatch while downloading file '+path.decode('ascii')+' '+sha256.decode('ascii')+' '+sha256hashed.decode('ascii')); - # sys.exit(1) - # if len(data) != size: - # print('size mismatch while downloading file') - # sys.exit(1) - # hashdir = os.path.dirname(hashfn) - # os.makedirs(hashdir,exist_ok=True) - # f = open(hashfn,'wb') - # f.write(data) - # f.close() - # - # os.utime(hashfn,(ts,ts)) if len(os.path.dirname(path)) > 0: os.makedirs(os.path.dirname(path),exist_ok=True) if os.path.isfile(makenewpath(path)): # "new" file already exists, lets check the hash fn = makenewpath(path) sha256hashed, tl = getfilesha256andsize(fn) if (sha256 == sha256hashed) and (size == tl): - print('existing file '+path.decode('ascii')+' matched by hash and size') + logging.info('existing file '+path.decode('ascii')+' matched by hash and size') fileupdates.add(path) return # no download needed but rename is - elif path in oldknownfiles: + elif path in oldknownfiles: #shortcut exit if file is unchanged, we skip this if a "new" file was detected because #that means some sort of update was going on to the file and may need to be finished/cleaned up. oldsha256,oldsize,oldstatus = oldknownfiles[path] @@ -187,7 +152,7 @@ def getfile(path,sha256,size): if (size == os.path.getsize(path)): #no point reading the data and calculating a hash if the size does not match sha256hashed, tl = getfilesha256andsize(path) if (sha256 == sha256hashed) and (size == tl): - print('existing file '+path.decode('ascii')+' matched by hash and size') + logging.info('existing file '+path.decode('ascii')+' matched by hash and size') if os.path.isfile(makenewpath(path)): #if file is up to date but a "new" file exists and is bad #(we wouldn't have got this far if it was good) @@ -203,7 +168,7 @@ def getfile(path,sha256,size): outputpath = path pathsplit = path.split(b'/') if (pathsplit[1:2] == [b'pool']) and (args.debugskippool): - print('skipping download of '+path.decode('ascii')+' because --debugskippool was specified') + logging.info('skipping download of '+path.decode('ascii')+' because --debugskippool was specified') return if (args.internal is not None) and (pathsplit[0] == b'raspbian'): fileurl = args.internal.encode('ascii') +b'/private/' + b'/'.join(pathsplit[1:]) @@ -212,13 +177,11 @@ def getfile(path,sha256,size): data = None if args.sourcepool is not None: for sourcepool in args.sourcepool: - #print(repr(args.sourcepool)) - #print(repr(sourcepool)) sourcepool = sourcepool.encode('ascii') if pathsplit[1] == b'pool': spp = os.path.join(sourcepool,b'/'.join(pathsplit[2:])) if os.path.isfile(spp) and (size == os.path.getsize(spp)): - print('trying file from sourcepool '+spp.decode('ascii')) + logging.info('trying file from sourcepool '+spp.decode('ascii')) ts = os.path.getmtime(spp) f = open(spp,'rb') data = f.read() @@ -226,18 +189,15 @@ def getfile(path,sha256,size): sha256hash = hashlib.sha256(data) sha256hashed = sha256hash.hexdigest().encode('ascii') if (sha256 != sha256hashed): - #print(repr(filesize)) - #print(repr(sha256)) - #print(repr(sha256hashed)) - print('hash mismatch while trying file from sourcepool, ignoring file'); + logging.info('hash mismatch while trying file from sourcepool, ignoring file'); data = None continue try: os.link(spp,outputpath) - print('successfully hardlinked file to source pool') - + logging.info('successfully hardlinked file to source pool') + except: - print('file in souce pool was good but hard linking failed, copying file instead') + logging.info('file in souce pool was good but hard linking failed, copying file instead') fdownloads.write(outputpath+b'\n') fdownloads.flush() return @@ -247,7 +207,7 @@ def getfile(path,sha256,size): gzfile = makenewpath(path+b'.gz') else: gzfile = path+b'.gz' - print('uncompressing '+gzfile.decode('ascii')+' with hash '+sha256.decode('ascii')+' to '+outputpath.decode('ascii')) + logging.info('uncompressing '+gzfile.decode('ascii')+' with hash '+sha256.decode('ascii')+' to '+outputpath.decode('ascii')) f = gzip.open(gzfile) data = f.read() f.close() @@ -255,14 +215,13 @@ def getfile(path,sha256,size): if not checkdatahash(data, sha256, 'hash mismatch while uncompressing file ', path, ''): sys.exit(1) if len(data) != size: - print('size mismatch while uncompressing file') + logging.info('size mismatch while uncompressing file') sys.exit(1) #use slicing so we don't error if pathsplit only has one item if (data is None) and (mdurl is not None) and (pathsplit[1:2] == [b'pool']): fileurl = mdurl + b'/' + path - #fileurl = mdurl + b'/' + b'/'.join(pathsplit[1:]) data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, ' from mirrordirector',' trying main server instead') if data is None: @@ -275,7 +234,7 @@ def getfile(path,sha256,size): data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '','') if data is None: if (stage == 'downloadnew') and (b'dists' not in pathsplit): - print('continuing dispite download failure of '+path.decode('ascii')+', may revisit later') + logging.info('continuing dispite download failure of '+path.decode('ascii')+', may revisit later') global dlerrorcount dlerrorcount += 1 knownfiles[path][2] = 'F' @@ -286,7 +245,7 @@ def getfile(path,sha256,size): fileurl = hpurl + b'/' + sha256[0:2] + b'/' + sha256[0:4] + b'/' + sha256 data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '', '') if data is None: - print('failed to get '+path.decode('ascii')+' aborting') + logging.info('failed to get '+path.decode('ascii')+' aborting') sys.exit(1) if data is not ...: #... is used to indicate that the file has been downloaded directly to disk and we don't # need to write it out here. @@ -326,7 +285,7 @@ def getandcheckfile(fileurl, sha256, size, path, outputpath, errorfromstr, error else: writepath = outputpath viamsg = '' - print( + logging.info( 'downloading ' + fileurl.decode('ascii') + ' with hash ' + sha256.decode( 'ascii') + ' to ' + outputpath.decode( 'ascii') + viamsg) @@ -348,10 +307,10 @@ def getandcheckfile(fileurl, sha256, size, path, outputpath, errorfromstr, error errorsuffix): data = None elif tl != size: - print('size mismatch while downloading file' + errorfromstr + '.' + errorsuffix) + logging.info('size mismatch while downloading file' + errorfromstr + '.' + errorsuffix) data = None except Exception as e: - print('exception ' + str(e) + ' while downloading file' + errorfromstr + '.' + errorsuffix) + logging.info('exception ' + str(e) + ' while downloading file' + errorfromstr + '.' + errorsuffix) if f is not None: f.close() data = None @@ -376,10 +335,7 @@ def checkdatahash(data, sha256, errorprefix, path, errorsuffix): def testandreporthash(sha256hash, sha256, errorprefix, path, errorsuffix): sha256hashed = sha256hash.hexdigest().encode('ascii') if (sha256 != sha256hashed): - # print(repr(filesize)) - # print(repr(sha256)) - # print(repr(sha256hashed)) - print(errorprefix + path.decode('ascii') + ' ' + sha256.decode('ascii') + ' ' + sha256hashed.decode( + logging.info(errorprefix + path.decode('ascii') + ' ' + sha256.decode('ascii') + ' ' + sha256hashed.decode( 'ascii') + errorsuffix); return False return True @@ -398,32 +354,26 @@ else: if args.baseurl is None: baseurl = b'https://archive.raspbian.org' mdurl = b'http://mirrordirector.raspbian.org' - hpurl = b'http://snapshot.raspbian.org/hashpool' + hpurl = b'https://snapshot.raspbian.org/hashpool' else: baseurl = args.baseurl.encode('ascii') - - - symlinkupdates = list() fileupdates = set() def opengu(filepath): - #print('in opengu') - #print('filepath = '+repr(filepath)) - #print('fileupdates = '+repr(fileupdates)) f = None if (filepath in fileupdates): - print((b'opening '+makenewpath(filepath)+b' for '+filepath).decode('ascii')) + logging.info((b'opening '+makenewpath(filepath)+b' for '+filepath).decode('ascii')) f = open(makenewpath(filepath),'rb') elif (filepath+b'.gz' in fileupdates): - print((b'opening '+makenewpath(filepath+b'.gz')+b' for '+filepath).decode('ascii')) + logging.info((b'opening '+makenewpath(filepath+b'.gz')+b' for '+filepath).decode('ascii')) f = gzip.open(makenewpath(filepath+b'.gz'),'rb') elif os.path.exists(filepath): - print((b'opening '+filepath+b' for '+filepath).decode('ascii')) + logging.info((b'opening '+filepath+b' for '+filepath).decode('ascii')) f = open(filepath,'rb') elif os.path.exists(filepath+b'.gz'): - print((b'opening '+filepath+b'.gz for '+filepath).decode('ascii')) + logging.info((b'opening '+filepath+b'.gz for '+filepath).decode('ascii')) f = gzip.open(filepath+b'.gz','rb') return f @@ -437,22 +387,22 @@ dlerrorcount = 0; for stage in ("scanexisting","downloadnew","finalize"): if stage == "finalize": if dlerrorcount == 0: - print('skipping stage 3 as there were no download failures in stage 2') + logging.info('skipping stage 3 as there were no download failures in stage 2') #we can finish now. break - print('stage 3, download final updates') - + logging.info('stage 3, download final updates') + oldknownfiles = knownfiles oldsymlinks |= newsymlinks newsymlinks = set() if stage == "downloadnew": - print('stage 2, main download') + logging.info('stage 2, main download') oldknownfiles = knownfiles basefiles = set(oldknownfiles.keys()) if stage == "scanexisting": - print('stage 1, scan existing') + logging.info('stage 1, scan existing') else: if args.internal is not None: fileurl = args.internal.encode('ascii') + b'/snapshotindex.txt' @@ -461,7 +411,7 @@ for stage in ("scanexisting","downloadnew","finalize"): if (stage == "downloadnew") and (args.debugfif is not None): fileurl = args.debugfif.encode('ascii') - (filedata,ts) = geturl(fileurl) + (filedata,ts) = geturl(fileurl) f = open(makenewpath(b'snapshotindex.txt'),'wb') if (args.tlwhitelist is None) and (args.distswhitelist is None): @@ -486,8 +436,6 @@ for stage in ("scanexisting","downloadnew","finalize"): for line in lines: path, sizeandsha = line.split(b' ') pathsplit = path.split(b'/') - #print(pathsplit) - #print(len(pathsplit)) if (len(pathsplit) > 2) and (pathsplit[1] == b'dists'): if sizeandsha[0:2] == b'->': #symlink target = sizeandsha[2:] @@ -502,15 +450,15 @@ for stage in ("scanexisting","downloadnew","finalize"): pass else: linesnew.append(line) - + lines = linesnew if founddists == set(): - print('none of the whitelisted distributions were found in the index file') + logging.info('none of the whitelisted distributions were found in the index file') sys.exit(1) missingesdists = founddists - foundesdists if missingesdists != set(): for toplevel,distribution in missingesdists: - print((b'missing extra sources file for '+toplevel+b'/dists/'+distribution).decode('ascii')) + logging.info((b'missing extra sources file for '+toplevel+b'/dists/'+distribution).decode('ascii')) sys.exit(1) for line in lines: f.write(line+b'\n') @@ -543,7 +491,7 @@ for stage in ("scanexisting","downloadnew","finalize"): if os.readlink(filepath) != symlinktarget: symlinkupdates.append((filepath,symlinktarget)) else: - print('creating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii')) + logging.info('creating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii')) os.symlink(symlinktarget,filepath) newsymlinks.add(filepath) else: @@ -557,23 +505,19 @@ for stage in ("scanexisting","downloadnew","finalize"): extrasources = {} while filequeue: (priority, filepath) = heappop(filequeue) - #print('processing '+filepath.decode('ascii')) sha256,size,status = knownfiles[filepath] if (stage != "scanexisting") and ((filepath+b'.gz' not in knownfiles) or (status == 'R') or os.path.exists(filepath)): getfile(filepath,sha256,size) pathsplit = filepath.split(b'/') - #print(pathsplit[-1]) - #if (pathsplit[-1] == b'Packages'): - # print(repr(pathsplit)) if (pathsplit[-1] == b'Release') and (pathsplit[-3] == b'dists'): distdir = b'/'.join(pathsplit[:-1]) f = opengu(filepath) if f is None: if stage == 'scanexisting': - print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') + logging.warning('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') continue else: - print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') + logging.error('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') sys.exit(1) insha256 = False; for line in f: @@ -595,20 +539,20 @@ for stage in ("scanexisting","downloadnew","finalize"): toplevel = b'/'.join(pathsplit[:-5]) else: toplevel = b'/'.join(pathsplit[:-6]) - print('found packages file: '+filepath.decode('ascii')) + logging.info('found packages file: '+filepath.decode('ascii')) pf = opengu(filepath) if pf is None: if stage == 'scanexisting': - print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') + logging.warning('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') continue else: - print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') + logging.error('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') sys.exit(1) filename = None size = None sha256 = None - + for line in pf: linesplit = line.split() if (len(linesplit) == 0): @@ -625,15 +569,15 @@ for stage in ("scanexisting","downloadnew","finalize"): sha256 = linesplit[1] pf.close() elif (pathsplit[-1] == b'Sources') and (pathsplit[-5] == b'dists'): - print('found sources file: '+filepath.decode('ascii')) + logging.info('found sources file: '+filepath.decode('ascii')) toplevel = b'/'.join(pathsplit[:-5]) pf = opengu(filepath) if pf is None: if stage == 'scanexisting': - print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') + logging.warning('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') continue else: - print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') + logging.error('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') sys.exit(1) filesfound = []; directory = None @@ -658,14 +602,14 @@ for stage in ("scanexisting","downloadnew","finalize"): insha256p = False pf.close() elif (args.distswhitelist is not None) and (pathsplit[-1] == b'extrasources') and (pathsplit[-3] == b'dists'): - print('found extrasources file: '+filepath.decode('ascii')) + logging.info('found extrasources file: '+filepath.decode('ascii')) esf = opengu(filepath) if esf is None: if stage == 'scanexisting': - print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') + logging.warning('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state') continue else: - print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') + logging.error('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting') sys.exit(1) for line in esf: line = line.strip() @@ -673,7 +617,6 @@ for stage in ("scanexisting","downloadnew","finalize"): size , sha256 = shaandsize.split(b':') addfilefromdebarchive(knownfiles,filequeue,filename,sha256,size) extrasources[filename] = shaandsize - #print(line) fdownloads.close() fdownloads = open(makenewpath(b'raspbmirrordownloads.txt'),"rb") @@ -689,7 +632,6 @@ if args.cleanup: for (dirpath, dirnames, filenames) in towalk: for filename in (filenames + dirnames): # os.walk seems to regard symlinks to directories as directories. filepath = os.path.join(dirpath, filename)[2:].encode('ascii') # [2:] is to strip the ./ prefix - # print(filepath) if os.path.islink(filepath): oldsymlinks.add(filepath) for filename in filenames: @@ -697,14 +639,14 @@ if args.cleanup: if not os.path.islink(filepath) and not filepath.startswith(b'snapshotindex.txt') and not filepath.startswith(b'raspbmirrordownloads.txt'): basefiles.add(filepath) -print('stage 4, moves and deletions') +logging.info('stage 4, moves and deletions') for filepath in fileupdates: - print((b'renaming '+makenewpath(filepath)+b' to '+filepath).decode('ascii')) + logging.info((b'renaming '+makenewpath(filepath)+b' to '+filepath).decode('ascii')) os.replace(makenewpath(filepath),filepath) for (filepath,symlinktarget) in symlinkupdates: - print('updating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii')) + logging.info('updating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii')) os.remove(filepath) os.symlink(symlinktarget,filepath) @@ -721,14 +663,14 @@ for filepath in removedfiles: #file may not actually exist, either due to earlier updates gone-wrong #or due to the file being a non-realised uncompressed version of #a gzipped file. - if os.path.exists(filepath): + if os.path.exists(filepath): ensuresafepath(filepath) - print('removing '+filepath.decode('ascii')) + logging.info('removing '+filepath.decode('ascii')) os.remove(filepath) #clean up empty directories. dirpath = os.path.dirname(filepath) while (len(dirpath) != 0) and isemptydir(dirpath): - print('removing empty dir '+dirpath.decode('ascii')) + logging.info('removing empty dir '+dirpath.decode('ascii')) os.rmdir(dirpath) dirpath = os.path.dirname(dirpath) @@ -739,4 +681,3 @@ f.close() os.rename(makenewpath(b'snapshotindex.txt'),b'snapshotindex.txt') os.remove(makenewpath(b'raspbmirrordownloads.txt')) - -- 2.43.0