Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 386860 Details for
Bug 525718
sys-apps/portage: emerge search actions should use an index to improve performance (like esearch)
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
emerge --search: use description index
emerge-search-use-description-index.patch (text/plain), 13.09 KB, created by
Zac Medico
on 2014-10-18 03:19:11 UTC
(
hide
)
Description:
emerge --search: use description index
Filename:
MIME Type:
Creator:
Zac Medico
Created:
2014-10-18 03:19:11 UTC
Size:
13.09 KB
patch
obsolete
>From 34eb27f314a7b67a922623adb226f33f5b399ef7 Mon Sep 17 00:00:00 2001 >From: Zac Medico <zmedico@gentoo.org> >Date: Fri, 17 Oct 2014 17:38:59 -0700 >Subject: [PATCH] emerge --search: use description index > >This adds an egencache --update-pkg-desc-index action which generates >a plain-text index of package names, versions, and descriptions. The >index can then be used to optimize emerge --search / --searchdesc >actions. If the package description index is missing from a particular >repository, then all metadata for that repository is obtained using the >normal pordbapi.aux_get method. > >Searching of installed packages is optimized to take advantage of >vardbdbapi._aux_cache, which is backed by vardb_metadata.pickle. >See the IndexedVardb docstring some more details. > >X-Gentoo-Bug: 525718 >X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 >--- > bin/egencache | 43 ++++++++++- > man/egencache.1 | 4 ++ > man/portage.5 | 6 ++ > pym/_emerge/search.py | 196 ++++++++++++++++++++++++++++++++++++++++++++++---- > 4 files changed, 232 insertions(+), 17 deletions(-) > >diff --git a/bin/egencache b/bin/egencache >index e366058..90d5e68 100755 >--- a/bin/egencache >+++ b/bin/egencache >@@ -57,7 +57,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler > from portage.util._eventloop.global_event_loop import global_event_loop > from portage import cpv_getkey > from portage.dep import Atom, isjustname >-from portage.versions import pkgsplit, vercmp >+from portage.versions import pkgsplit, vercmp, _pkg_str > > try: > from xml.etree import ElementTree >@@ -91,6 +91,9 @@ def parse_args(args): > actions.add_argument("--update-changelogs", > action="store_true", > help="update the ChangeLog files from SCM logs") >+ actions.add_argument("--update-pkg-desc-index", >+ action="store_true", >+ help="update package description index") > actions.add_argument("--update-manifests", > action="store_true", > help="update manifests") >@@ -451,6 +454,35 @@ class GenCache(object): > if hasattr(trg_cache, '_prune_empty_dirs'): > trg_cache._prune_empty_dirs() > >+class GenPkgDescIndex(object): >+ def __init__(self, portdb, output_file): >+ self.returncode = os.EX_OK >+ self._portdb = portdb >+ self._output_file = output_file >+ >+ def run(self): >+ >+ portage.util.ensure_dirs(os.path.dirname(self._output_file)) >+ f = portage.util.atomic_ofstream(self._output_file, >+ encoding=_encodings["repo.content"]) >+ >+ portdb = self._portdb >+ for cp in portdb.cp_all(): >+ pkgs = portdb.cp_list(cp) >+ if not pkgs: >+ continue >+ desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"]) >+ >+ if len(pkgs) == 1: >+ output = "%s: %s\n" % (pkgs[0], desc) >+ else: >+ output = "%s,%s: %s\n" % (pkgs[0], >+ ",".join(_pkg_str(cpv).version >+ for cpv in pkgs[1:]), desc) >+ f.write(output) >+ >+ f.close() >+ > class GenUseLocalDesc(object): > def __init__(self, portdb, output=None, > preserve_comments=False): >@@ -893,7 +925,8 @@ def egencache_main(args): > local_config=False, env=env) > > if not (options.update or options.update_use_local_desc or >- options.update_changelogs or options.update_manifests): >+ options.update_changelogs or options.update_manifests or >+ options.update_pkg_desc_index): > parser.error('No action specified') > return 1 > >@@ -1057,6 +1090,12 @@ def egencache_main(args): > else: > ret.append(scheduler.returncode) > >+ if options.update_pkg_desc_index: >+ gen_index = GenPkgDescIndex(portdb, os.path.join( >+ repo_config.location, "metadata", "pkg_desc_index")) >+ gen_index.run() >+ ret.append(gen_index.returncode) >+ > if options.update_use_local_desc: > gen_desc = GenUseLocalDesc(portdb, > output=options.uld_output, >diff --git a/man/egencache.1 b/man/egencache.1 >index f71feb3..3a3197f 100644 >--- a/man/egencache.1 >+++ b/man/egencache.1 >@@ -19,6 +19,10 @@ for the details on package atom syntax. > .BR "\-\-update\-changelogs" > Update the ChangeLog files from SCM logs (supported only in git repos). > .TP >+.BR "\-\-update\-pkg\-desc\-index" >+Update the package description index which is located at >+\fImetadata/pkg_desc_index\fR in the repository. >+.TP > .BR "\-\-update\-use\-local\-desc" > Update the \fIprofiles/use.local.desc\fR file from metadata.xml. > .TP >diff --git a/man/portage.5 b/man/portage.5 >index e399f0f..26856d1 100644 >--- a/man/portage.5 >+++ b/man/portage.5 >@@ -75,6 +75,7 @@ user\-defined package sets > .BR /usr/portage/metadata/ > .nf > layout.conf >+pkg_desc_index > .fi > .TP > .BR /usr/portage/profiles/ >@@ -1110,6 +1111,11 @@ cache\-formats = md5-dict pms > profile\-formats = portage-2 > .fi > .RE >+.TP >+.BR pkg_desc_index >+This is an index of packages and descriptions which may be generated >+by \fBegencache\fR(1) in order to optimize \fBemerge\fR(1) search >+actions. > .RE > .TP > .BR /usr/portage/profiles/ >diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py >index 4b0fd9f..bf15f11 100644 >--- a/pym/_emerge/search.py >+++ b/pym/_emerge/search.py >@@ -3,13 +3,17 @@ > > from __future__ import print_function > >+import io > import re > import portage >-from portage import os >+from portage import os, _encodings > from portage.dbapi.porttree import _parse_uri_map >+from portage.dep import Atom >+from portage.exception import InvalidData > from portage.localization import localized_size > from portage.output import bold, bold as white, darkgreen, green, red > from portage.util import writemsg_stdout >+from portage.versions import _pkg_str > > from _emerge.Package import Package > >@@ -30,7 +34,6 @@ class search(object): > The list of available and installed packages is created at object instantiation. > This makes successive searches faster.""" > self.settings = root_config.settings >- self.vartree = root_config.trees["vartree"] > self.spinner = spinner > self.verbose = verbose > self.searchdesc = searchdesc >@@ -41,9 +44,9 @@ class search(object): > > self._dbs = [] > >- portdb = root_config.trees["porttree"].dbapi >+ portdb = IndexedPortdb(root_config.trees["porttree"].dbapi) > bindb = root_config.trees["bintree"].dbapi >- vardb = root_config.trees["vartree"].dbapi >+ vardb = IndexedVardb(root_config.trees["vartree"].dbapi) > > if not usepkgonly and portdb._have_root_eclass_dir: > self._dbs.append(portdb) >@@ -53,6 +56,7 @@ class search(object): > > self._dbs.append(vardb) > self._portdb = portdb >+ self._vardb = vardb > > def _spinner_update(self): > if self.spinner: >@@ -97,7 +101,7 @@ class search(object): > return {} > > def _visible(self, db, cpv, metadata): >- installed = db is self.vartree.dbapi >+ installed = db is self._vardb > built = installed or db is not self._portdb > pkg_type = "ebuild" > if installed: >@@ -208,6 +212,20 @@ class search(object): > masked=1 > self.matches["pkg"].append([package,masked]) > elif self.searchdesc: # DESCRIPTION searching >+ # Check for DESCRIPTION match first, so that we can skip >+ # the expensive visiblity check if it doesn't match. >+ full_package = portage.best( >+ self._xmatch("match-all", package)) >+ try: >+ full_desc = self._aux_get( >+ full_package, ["DESCRIPTION"])[0] >+ except KeyError: >+ portage.writemsg( >+ "emerge: search: aux_get() failed, skipping\n", >+ noiselevel=-1) >+ continue >+ if not self.searchre.search(full_desc): >+ continue > full_package = self._xmatch("bestmatch-visible", package) > if not full_package: > #no match found; we don't want to query description >@@ -217,14 +235,8 @@ class search(object): > continue > else: > masked=1 >- try: >- full_desc = self._aux_get( >- full_package, ["DESCRIPTION"])[0] >- except KeyError: >- print("emerge: search: aux_get() failed, skipping") >- continue >- if self.searchre.search(full_desc): >- self.matches["desc"].append([full_package,masked]) >+ >+ self.matches["desc"].append((full_package, masked)) > > self.sdict = self.setconfig.getSets() > for setname in self.sdict: >@@ -262,7 +274,7 @@ class search(object): > bold(self.searchkey) + " ]\n") > msg.append("[ Applications found : " + \ > bold(str(self.mlen)) + " ]\n\n") >- vardb = self.vartree.dbapi >+ vardb = self._vardb > metadata_keys = set(Package.metadata_keys) > metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) > metadata_keys = tuple(metadata_keys) >@@ -372,7 +384,11 @@ class search(object): > # private interface > # > def getInstallationStatus(self,package): >- installed_package = self.vartree.dep_bestmatch(package) >+ installed_package = self._vardb.match(package) >+ if installed_package: >+ installed_package = installed_package[-1] >+ else: >+ installed_package = "" > result = "" > version = self.getVersion(installed_package,search.VERSION_RELEASE) > if len(version) > 0: >@@ -392,3 +408,153 @@ class search(object): > result = "" > return result > >+ >+class IndexedPortdb(object): >+ """ >+ A portdbapi interface that uses a package description index to >+ improve performance. If the description index is missing for a >+ particular repository, then all metadata for that repository is >+ obtained using the normal pordbapi.aux_get method. >+ """ >+ def __init__(self, portdb): >+ self._portdb = portdb >+ self.cpv_exists = portdb.cpv_exists >+ self.getFetchMap = portdb.getFetchMap >+ self.findname = portdb.findname >+ self._aux_cache_keys = portdb._aux_cache_keys >+ self._have_root_eclass_dir = portdb._have_root_eclass_dir >+ self._cpv_sort_ascending = portdb._cpv_sort_ascending >+ self._desc_cache = None >+ self._cp_map = None >+ >+ def _init_index(self): >+ cp_map = {} >+ desc_cache = {} >+ for repo_path in self._portdb.porttrees: >+ outside_repo = os.path.join(self._portdb.depcachedir, >+ repo_path.lstrip(os.sep)) >+ for parent_dir in (repo_path, outside_repo): >+ file_path = os.path.join(parent_dir, >+ "metadata", "pkg_desc_index") >+ >+ try: >+ with io.open(file_path, >+ encoding=_encodings["repo.content"]) as f: >+ for line in f: >+ pkgs, desc = line.split(":", 1) >+ desc = desc.strip() >+ pkgs = pkgs.split(",") >+ if not pkgs[0]: >+ continue >+ try: >+ pkg = _pkg_str(pkgs[0]) >+ except InvalidData: >+ continue >+ cp_list = cp_map.get(pkg.cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[pkg.cp] = cp_list >+ cp_list.append(pkg) >+ for ver in pkgs[1:]: >+ try: >+ cp_list.append( >+ _pkg_str(pkg.cp + "-" + ver)) >+ except InvalidData: >+ pass >+ for cpv in cp_list: >+ desc_cache[cpv] = desc >+ except IOError: >+ pass >+ else: >+ break >+ else: >+ # No descriptions index was found, so populate >+ # cp_map the slow way. >+ for cp in self._portdb.cp_all(trees=[repo_path]): >+ cp_list = cp_map.get(cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[cp] = cp_list >+ for cpv in self._portdb.cp_list(cp, mytree=repo_path): >+ if cpv not in cp_list: >+ cp_list.append(_pkg_str(cpv)) >+ >+ self._desc_cache = desc_cache >+ self._cp_map = cp_map >+ >+ def cp_all(self): >+ if self._cp_map is None: >+ self._init_index() >+ return list(self._cp_map) >+ >+ def match(self, atom): >+ if not isinstance(atom, Atom): >+ atom = Atom(atom) >+ cp_list = self._cp_map.get(atom.cp) >+ if cp_list is None: >+ return [] >+ self._portdb._cpv_sort_ascending(cp_list) >+ return portage.match_from_list(atom, cp_list) >+ >+ def aux_get(self, cpv, attrs, myrepo = None): >+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION": >+ try: >+ return [self._desc_cache[cpv]] >+ except KeyError: >+ pass >+ return self._portdb.aux_get(cpv, attrs) >+ >+ >+class IndexedVardb(object): >+ """ >+ A vardbapi interface that sacrifices validation in order to >+ improve performance. It takes advantage of vardbdbapi._aux_cache, >+ which is backed by vardb_metadata.pickle. Since _aux_cache is >+ not updated for every single merge/unmerge (see >+ _aux_cache_threshold), the list of packages is obtained directly >+ from the real vardbapi instance. If a package is missing from >+ _aux_cache, then its metadata is obtained using the normal >+ (validated) vardbapi.aux_get method. >+ """ >+ def __init__(self, vardb): >+ self._vardb = vardb >+ self._aux_cache_keys = vardb._aux_cache_keys >+ self._cpv_sort_ascending = vardb._cpv_sort_ascending >+ self._cp_map = {} >+ self.cpv_exists = vardb.cpv_exists >+ >+ def cp_all(self): >+ if self._cp_map: >+ return list(self._cp_map) >+ cp_map = self._cp_map >+ for cpv in self._vardb.cpv_all(): >+ cp = portage.cpv_getkey(cpv) >+ if cp is not None: >+ cp_list = cp_map.get(cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[cp] = cp_list >+ cp_list.append(_pkg_str(cpv)) >+ return list(cp_map) >+ >+ def match(self, atom): >+ if not isinstance(atom, Atom): >+ atom = Atom(atom) >+ cp_list = self._cp_map.get(atom.cp) >+ if cp_list is None: >+ return [] >+ self._vardb._cpv_sort_ascending(cp_list) >+ return portage.match_from_list(atom, cp_list) >+ >+ def aux_get(self, cpv, attrs, myrepo = None): >+ pkg_data = self._vardb._aux_cache["packages"].get(cpv) >+ if not isinstance(pkg_data, tuple) or \ >+ len(pkg_data) != 2 or \ >+ not isinstance(pkg_data[1], dict): >+ pkg_data = None >+ if pkg_data is None: >+ # It may be missing from _aux_cache due to >+ # _aux_cache_threshold. >+ return self._vardb.aux_get(cpv, attrs) >+ metadata = pkg_data[1] >+ return [metadata.get(k, "") for k in attrs] >-- >2.0.4 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 525718
:
386860
|
386866
|
386988