From 8e5f1fb1efc2fb51de1fcc69b8ca7d09e2d2eecc Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Fri, 10 Mar 2017 01:38:04 -0800 Subject: [PATCH] revdep_rebuild/assign.py: handle directory symlinks (bug 611808) Use a _file_matcher class to make file comparisons work regardless of directory symlinks. X-Gentoo-bug: 611808 X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=611808 --- pym/gentoolkit/revdep_rebuild/assign.py | 67 +++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/pym/gentoolkit/revdep_rebuild/assign.py b/pym/gentoolkit/revdep_rebuild/assign.py index 00dda6e..84bd59f 100644 --- a/pym/gentoolkit/revdep_rebuild/assign.py +++ b/pym/gentoolkit/revdep_rebuild/assign.py @@ -6,6 +6,7 @@ Functions used for determining the package the broken lib belongs to. from __future__ import print_function +import errno import os import io import re @@ -22,11 +23,61 @@ try: except NameError: pass + +class _file_matcher(object): + """ + Compares files by basename and parent directory (device, inode), + so comparisons work regardless of directory symlinks. If a + parent directory does not exist, the realpath of the parent + directory is used instead of the (device, inode). When multiple + files share the same parent directory, stat is only called + once per directory, and the result is cached internally. + """ + def __init__(self): + self._file_ids = {} + self._added = {} + + def _file_id(self, filename): + try: + return self._file_ids[filename] + except KeyError: + try: + st = os.stat(filename) + except OSError as e: + if e.errno != errno.ENOENT: + raise + file_id = (os.path.realpath(filename),) + else: + file_id = (st.st_dev, st.st_ino) + + self._file_ids[filename] = file_id + return file_id + + def _file_key(self, filename): + head, tail = os.path.split(filename) + key = self._file_id(head) + (tail,) + return key + + def add(self, filename): + self._added[self._file_key(filename)] = filename + + def intersection(self, other): + for file_key in self._added: + match = other._added.get(file_key) + if match is not None: + yield match + + def assign_packages(broken, logger, settings): ''' Finds and returns packages that owns files placed in broken. Broken is list of files ''' stime = current_milli_time() + + broken_matcher = _file_matcher() + for filename in broken: + broken_matcher.add(filename) + assigned_pkgs = set() assigned_filenames = set() for group in os.listdir(settings['PKG_DIR']): @@ -39,21 +90,23 @@ def assign_packages(broken, logger, settings): continue f = pkgpath + '/CONTENTS' if os.path.exists(f): + contents_matcher = _file_matcher() try: with io.open(f, 'r', encoding='utf_8') as cnt: for line in cnt.readlines(): m = re.match('^obj (/[^ ]+)', line) if m is not None: - m = m.group(1) - if m in broken: - found = group+'/'+pkg - assigned_pkgs.add(found) - assigned_filenames.add(m) - logger.info('\t' + green('* ') + m + - ' -> ' + bold(found)) + contents_matcher.add(m.group(1)) except Exception as e: logger.warning(red(' !! Failed to read ' + f)) logger.warning(red(' !! Error was:' + str(e))) + else: + for m in contents_matcher.intersection(broken_matcher): + found = group+'/'+pkg + assigned_pkgs.add(found) + assigned_filenames.add(m) + logger.info('\t' + green('* ') + m + + ' -> ' + bold(found)) broken_filenames = set(broken) orphaned = broken_filenames.difference(assigned_filenames) -- 2.10.2