Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 386988 Details for
Bug 525718
sys-apps/portage: emerge search actions should use an index to improve performance (like esearch)
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
emerge --search: use description index
emerge-search-use-description-index.patch (text/plain), 15.74 KB, created by
Zac Medico
on 2014-10-19 21:41:33 UTC
(
hide
)
Description:
emerge --search: use description index
Filename:
MIME Type:
Creator:
Zac Medico
Created:
2014-10-19 21:41:33 UTC
Size:
15.74 KB
patch
obsolete
>From 5192579f79da36f5a1ce5f3651c26ccb235cca28 Mon Sep 17 00:00:00 2001 >From: Zac Medico <zmedico@gentoo.org> >Date: Fri, 17 Oct 2014 17:38:59 -0700 >Subject: [PATCH] emerge --search: use description index > >This adds an egencache --update-pkg-desc-index action which generates >a plain-text index of package names, versions, and descriptions. The >index can then be used to optimize emerge --search / --searchdesc >actions. If the package description index is missing from a particular >repository, then all metadata for that repository is obtained using the >normal pordbapi.aux_get method. > >Searching of installed packages is optimized to take advantage of >vardbdbapi._aux_cache, which is backed by vdb_metadata.pickle. >See the IndexedVardb docstring for some more details. > >For users that would like to modify ebuilds in a repository without >running egencache afterwards, the new emerge --search-index < y | n > >option can be used to get non-indexed search. Alternatively, the user >could simply remove the stale index file, in order to disable the >search index for a particular repository. > >X-Gentoo-Bug: 525718 >X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 >--- > bin/egencache | 39 +++++++++- > man/egencache.1 | 4 + > man/emerge.1 | 8 ++ > man/portage.5 | 12 +++ > pym/_emerge/actions.py | 3 +- > pym/_emerge/main.py | 5 ++ > pym/_emerge/search.py | 207 +++++++++++++++++++++++++++++++++++++++++++++---- > 7 files changed, 261 insertions(+), 17 deletions(-) > >diff --git a/bin/egencache b/bin/egencache >index e366058..95cb1ad 100755 >--- a/bin/egencache >+++ b/bin/egencache >@@ -57,7 +57,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler > from portage.util._eventloop.global_event_loop import global_event_loop > from portage import cpv_getkey > from portage.dep import Atom, isjustname >-from portage.versions import pkgsplit, vercmp >+from portage.versions import pkgsplit, vercmp, _pkg_str > > try: > from xml.etree import ElementTree >@@ -91,6 +91,9 @@ def parse_args(args): > actions.add_argument("--update-changelogs", > action="store_true", > help="update the ChangeLog files from SCM logs") >+ actions.add_argument("--update-pkg-desc-index", >+ action="store_true", >+ help="update package description index") > actions.add_argument("--update-manifests", > action="store_true", > help="update manifests") >@@ -451,6 +454,31 @@ class GenCache(object): > if hasattr(trg_cache, '_prune_empty_dirs'): > trg_cache._prune_empty_dirs() > >+class GenPkgDescIndex(object): >+ def __init__(self, portdb, output_file): >+ self.returncode = os.EX_OK >+ self._portdb = portdb >+ self._output_file = output_file >+ >+ def run(self): >+ >+ portage.util.ensure_dirs(os.path.dirname(self._output_file)) >+ f = portage.util.atomic_ofstream(self._output_file, >+ encoding=_encodings["repo.content"]) >+ >+ portdb = self._portdb >+ for cp in portdb.cp_all(): >+ pkgs = portdb.cp_list(cp) >+ if not pkgs: >+ continue >+ desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"]) >+ >+ f.write("%s %s: %s\n" % (cp, >+ " ".join(_pkg_str(cpv).version >+ for cpv in pkgs), desc)) >+ >+ f.close() >+ > class GenUseLocalDesc(object): > def __init__(self, portdb, output=None, > preserve_comments=False): >@@ -893,7 +921,8 @@ def egencache_main(args): > local_config=False, env=env) > > if not (options.update or options.update_use_local_desc or >- options.update_changelogs or options.update_manifests): >+ options.update_changelogs or options.update_manifests or >+ options.update_pkg_desc_index): > parser.error('No action specified') > return 1 > >@@ -1057,6 +1086,12 @@ def egencache_main(args): > else: > ret.append(scheduler.returncode) > >+ if options.update_pkg_desc_index: >+ gen_index = GenPkgDescIndex(portdb, os.path.join( >+ repo_config.location, "metadata", "pkg_desc_index")) >+ gen_index.run() >+ ret.append(gen_index.returncode) >+ > if options.update_use_local_desc: > gen_desc = GenUseLocalDesc(portdb, > output=options.uld_output, >diff --git a/man/egencache.1 b/man/egencache.1 >index f71feb3..3a3197f 100644 >--- a/man/egencache.1 >+++ b/man/egencache.1 >@@ -19,6 +19,10 @@ for the details on package atom syntax. > .BR "\-\-update\-changelogs" > Update the ChangeLog files from SCM logs (supported only in git repos). > .TP >+.BR "\-\-update\-pkg\-desc\-index" >+Update the package description index which is located at >+\fImetadata/pkg_desc_index\fR in the repository. >+.TP > .BR "\-\-update\-use\-local\-desc" > Update the \fIprofiles/use.local.desc\fR file from metadata.xml. > .TP >diff --git a/man/emerge.1 b/man/emerge.1 >index 2264b58..efd5d41 100644 >--- a/man/emerge.1 >+++ b/man/emerge.1 >@@ -790,6 +790,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in > the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support > \fBHDEPEND\fR, this option affects only the former. > .TP >+.BR "\-\-search\-index < y | n >" >+Enable or disable indexed search for search actions. This option is >+enabled by default. The search index needs to be regenerated by >+\fBegencache\fR(1) after changes are made to a repository (see the >+\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added >+to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later >+overridden via the command line. >+.TP > .BR "\-\-select [ y | n ] (\-w short option)" > Add specified packages to the world set (inverse of > \fB\-\-oneshot\fR). This is useful if you want to >diff --git a/man/portage.5 b/man/portage.5 >index e399f0f..bf9457c 100644 >--- a/man/portage.5 >+++ b/man/portage.5 >@@ -75,6 +75,7 @@ user\-defined package sets > .BR /usr/portage/metadata/ > .nf > layout.conf >+pkg_desc_index > .fi > .TP > .BR /usr/portage/profiles/ >@@ -1110,6 +1111,17 @@ cache\-formats = md5-dict pms > profile\-formats = portage-2 > .fi > .RE >+.TP >+.BR pkg_desc_index >+This is an index of package names, versions, and descriptions which >+may be generated by \fBegencache\fR(1) in order to optimize >+\fBemerge\fR(1) search actions. >+ >+.I Example: >+.nf >+sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor >+sys-apps/usleep 0.1: A wrapper for usleep >+.fi > .RE > .TP > .BR /usr/portage/profiles/ >diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py >index 4e8b83b..a81212c 100644 >--- a/pym/_emerge/actions.py >+++ b/pym/_emerge/actions.py >@@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner): > searchinstance = search(root_config, > spinner, "--searchdesc" in myopts, > "--quiet" not in myopts, "--usepkg" in myopts, >- "--usepkgonly" in myopts) >+ "--usepkgonly" in myopts, >+ search_index = myopts.get("--search-index", "y") != "n") > for mysearch in myfiles: > try: > searchinstance.execute(mysearch) >diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py >index 3883f72..d403b36 100644 >--- a/pym/_emerge/main.py >+++ b/pym/_emerge/main.py >@@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False): > "choices" :("True", "rdeps") > }, > >+ "--search-index": { >+ "help": "Enable or disable indexed search (enabled by default)", >+ "choices": y_or_n >+ }, >+ > "--select": { > "shortopt" : "-w", > "help" : "add specified packages to the world set " + \ >diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py >index 4b0fd9f..37fee20 100644 >--- a/pym/_emerge/search.py >+++ b/pym/_emerge/search.py >@@ -3,13 +3,17 @@ > > from __future__ import print_function > >+import io > import re > import portage >-from portage import os >+from portage import os, _encodings > from portage.dbapi.porttree import _parse_uri_map >+from portage.dep import Atom >+from portage.exception import InvalidAtom, InvalidData > from portage.localization import localized_size > from portage.output import bold, bold as white, darkgreen, green, red > from portage.util import writemsg_stdout >+from portage.versions import _pkg_str > > from _emerge.Package import Package > >@@ -25,12 +29,11 @@ class search(object): > # public interface > # > def __init__(self, root_config, spinner, searchdesc, >- verbose, usepkg, usepkgonly): >+ verbose, usepkg, usepkgonly, search_index = True): > """Searches the available and installed packages for the supplied search key. > The list of available and installed packages is created at object instantiation. > This makes successive searches faster.""" > self.settings = root_config.settings >- self.vartree = root_config.trees["vartree"] > self.spinner = spinner > self.verbose = verbose > self.searchdesc = searchdesc >@@ -45,6 +48,10 @@ class search(object): > bindb = root_config.trees["bintree"].dbapi > vardb = root_config.trees["vartree"].dbapi > >+ if search_index: >+ portdb = IndexedPortdb(portdb) >+ vardb = IndexedVardb(vardb) >+ > if not usepkgonly and portdb._have_root_eclass_dir: > self._dbs.append(portdb) > >@@ -53,6 +60,7 @@ class search(object): > > self._dbs.append(vardb) > self._portdb = portdb >+ self._vardb = vardb > > def _spinner_update(self): > if self.spinner: >@@ -97,7 +105,7 @@ class search(object): > return {} > > def _visible(self, db, cpv, metadata): >- installed = db is self.vartree.dbapi >+ installed = db is self._vardb > built = installed or db is not self._portdb > pkg_type = "ebuild" > if installed: >@@ -208,6 +216,22 @@ class search(object): > masked=1 > self.matches["pkg"].append([package,masked]) > elif self.searchdesc: # DESCRIPTION searching >+ # Check for DESCRIPTION match first, so that we can skip >+ # the expensive visiblity check if it doesn't match. >+ full_package = self._xmatch("match-all", package) >+ if not full_package: >+ continue >+ full_package = full_package[-1] >+ try: >+ full_desc = self._aux_get( >+ full_package, ["DESCRIPTION"])[0] >+ except KeyError: >+ portage.writemsg( >+ "emerge: search: aux_get() failed, skipping\n", >+ noiselevel=-1) >+ continue >+ if not self.searchre.search(full_desc): >+ continue > full_package = self._xmatch("bestmatch-visible", package) > if not full_package: > #no match found; we don't want to query description >@@ -217,14 +241,8 @@ class search(object): > continue > else: > masked=1 >- try: >- full_desc = self._aux_get( >- full_package, ["DESCRIPTION"])[0] >- except KeyError: >- print("emerge: search: aux_get() failed, skipping") >- continue >- if self.searchre.search(full_desc): >- self.matches["desc"].append([full_package,masked]) >+ >+ self.matches["desc"].append((full_package, masked)) > > self.sdict = self.setconfig.getSets() > for setname in self.sdict: >@@ -262,7 +280,7 @@ class search(object): > bold(self.searchkey) + " ]\n") > msg.append("[ Applications found : " + \ > bold(str(self.mlen)) + " ]\n\n") >- vardb = self.vartree.dbapi >+ vardb = self._vardb > metadata_keys = set(Package.metadata_keys) > metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) > metadata_keys = tuple(metadata_keys) >@@ -372,7 +390,11 @@ class search(object): > # private interface > # > def getInstallationStatus(self,package): >- installed_package = self.vartree.dep_bestmatch(package) >+ installed_package = self._vardb.match(package) >+ if installed_package: >+ installed_package = installed_package[-1] >+ else: >+ installed_package = "" > result = "" > version = self.getVersion(installed_package,search.VERSION_RELEASE) > if len(version) > 0: >@@ -392,3 +414,160 @@ class search(object): > result = "" > return result > >+ >+class IndexedPortdb(object): >+ """ >+ A portdbapi interface that uses a package description index to >+ improve performance. If the description index is missing for a >+ particular repository, then all metadata for that repository is >+ obtained using the normal pordbapi.aux_get method. >+ """ >+ def __init__(self, portdb): >+ self._portdb = portdb >+ self.cpv_exists = portdb.cpv_exists >+ self.getFetchMap = portdb.getFetchMap >+ self.findname = portdb.findname >+ self._aux_cache_keys = portdb._aux_cache_keys >+ self._have_root_eclass_dir = portdb._have_root_eclass_dir >+ self._cpv_sort_ascending = portdb._cpv_sort_ascending >+ self._desc_cache = None >+ self._cp_map = None >+ >+ def _init_index(self): >+ cp_map = {} >+ desc_cache = {} >+ for repo_path in self._portdb.porttrees: >+ outside_repo = os.path.join(self._portdb.depcachedir, >+ repo_path.lstrip(os.sep)) >+ for parent_dir in (repo_path, outside_repo): >+ file_path = os.path.join(parent_dir, >+ "metadata", "pkg_desc_index") >+ >+ try: >+ with io.open(file_path, >+ encoding=_encodings["repo.content"]) as f: >+ for line in f: >+ try: >+ pkgs, desc = line.split(":", 1) >+ except ValueError: >+ continue >+ desc = desc.strip() >+ try: >+ cp, pkgs = pkgs.split(" ", 1) >+ except ValueError: >+ continue >+ if not cp: >+ continue >+ try: >+ atom = Atom(cp) >+ except InvalidAtom: >+ continue >+ if cp != atom.cp: >+ continue >+ cp_list = cp_map.get(cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[cp] = cp_list >+ for ver in pkgs.split(): >+ try: >+ cpv = _pkg_str(cp + "-" + ver) >+ except InvalidData: >+ pass >+ else: >+ cp_list.append(cpv) >+ desc_cache[cpv] = desc >+ except IOError: >+ pass >+ else: >+ break >+ else: >+ # No descriptions index was found, so populate >+ # cp_map the slow way. >+ for cp in self._portdb.cp_all(trees=[repo_path]): >+ cp_list = cp_map.get(cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[cp] = cp_list >+ for cpv in self._portdb.cp_list(cp, mytree=repo_path): >+ if cpv not in cp_list: >+ cp_list.append(_pkg_str(cpv)) >+ >+ self._desc_cache = desc_cache >+ self._cp_map = cp_map >+ >+ def cp_all(self): >+ if self._cp_map is None: >+ self._init_index() >+ return list(self._cp_map) >+ >+ def match(self, atom): >+ if not isinstance(atom, Atom): >+ atom = Atom(atom) >+ cp_list = self._cp_map.get(atom.cp) >+ if cp_list is None: >+ return [] >+ self._portdb._cpv_sort_ascending(cp_list) >+ return portage.match_from_list(atom, cp_list) >+ >+ def aux_get(self, cpv, attrs, myrepo = None): >+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION": >+ try: >+ return [self._desc_cache[cpv]] >+ except KeyError: >+ pass >+ return self._portdb.aux_get(cpv, attrs) >+ >+ >+class IndexedVardb(object): >+ """ >+ A vardbapi interface that sacrifices validation in order to >+ improve performance. It takes advantage of vardbdbapi._aux_cache, >+ which is backed by vdb_metadata.pickle. Since _aux_cache is >+ not updated for every single merge/unmerge (see >+ _aux_cache_threshold), the list of packages is obtained directly >+ from the real vardbapi instance. If a package is missing from >+ _aux_cache, then its metadata is obtained using the normal >+ (validated) vardbapi.aux_get method. >+ """ >+ def __init__(self, vardb): >+ self._vardb = vardb >+ self._aux_cache_keys = vardb._aux_cache_keys >+ self._cpv_sort_ascending = vardb._cpv_sort_ascending >+ self._cp_map = {} >+ self.cpv_exists = vardb.cpv_exists >+ >+ def cp_all(self): >+ if self._cp_map: >+ return list(self._cp_map) >+ cp_map = self._cp_map >+ for cpv in self._vardb.cpv_all(): >+ cp = portage.cpv_getkey(cpv) >+ if cp is not None: >+ cp_list = cp_map.get(cp) >+ if cp_list is None: >+ cp_list = [] >+ cp_map[cp] = cp_list >+ cp_list.append(_pkg_str(cpv)) >+ return list(cp_map) >+ >+ def match(self, atom): >+ if not isinstance(atom, Atom): >+ atom = Atom(atom) >+ cp_list = self._cp_map.get(atom.cp) >+ if cp_list is None: >+ return [] >+ self._vardb._cpv_sort_ascending(cp_list) >+ return portage.match_from_list(atom, cp_list) >+ >+ def aux_get(self, cpv, attrs, myrepo = None): >+ pkg_data = self._vardb._aux_cache["packages"].get(cpv) >+ if not isinstance(pkg_data, tuple) or \ >+ len(pkg_data) != 2 or \ >+ not isinstance(pkg_data[1], dict): >+ pkg_data = None >+ if pkg_data is None: >+ # It may be missing from _aux_cache due to >+ # _aux_cache_threshold. >+ return self._vardb.aux_get(cpv, attrs) >+ metadata = pkg_data[1] >+ return [metadata.get(k, "") for k in attrs] >-- >2.0.4 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 525718
:
386860
|
386866
| 386988