# Copyright 1999-2012 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 # $Header: $ EAPI="3" inherit eutils autotools DESCRIPTION="An OCR Engine that was developed at HP and now at Google" HOMEPAGE="http://code.google.com/p/tesseract-ocr/" # deu-frak corresponds to an old german graphic style named fraktur # that's the same language (german, de) URI_PREFIX="http://tesseract-ocr.googlecode.com/files" SRC_URI="${URI_PREFIX}/tesseract-${PV}.tar.gz ${URI_PREFIX}/${P}.eng.tar.gz linguas_ar? ( ${URI_PREFIX}/${P}.ara.tar.gz ) linguas_he? ( ${URI_PREFIX}/${P}.heb.tar.gz ${URI_PREFIX}/${P}.heb-com.tar.gz ) linguas_hi? ( ${URI_PREFIX}/${P}.hin.tar.gz ) linguas_sk? ( ${URI_PREFIX}/${P}.slk-frak.tar.gz ${URI_PREFIX}/slk.traineddata.gz ) linguas_th? ( ${URI_PREFIX}/${P}.tha.tar.gz ) linguas_bg? ( ${URI_PREFIX}/bul.traineddata.gz ) linguas_ca? ( ${URI_PREFIX}/cat.traineddata.gz ) linguas_cs? ( ${URI_PREFIX}/ces.traineddata.gz ) linguas_da? ( ${URI_PREFIX}/dan.traineddata.gz ) linguas_de? ( ${URI_PREFIX}/deu.traineddata.gz ${URI_PREFIX}/deu-frak.traineddata.gz ) linguas_el? ( ${URI_PREFIX}/ell.traineddata.gz ) linguas_es? ( ${URI_PREFIX}/spa.traineddata.gz ) linguas_fi? ( ${URI_PREFIX}/fin.traineddata.gz ) linguas_fr? ( ${URI_PREFIX}/fra.traineddata.gz ) linguas_id? ( ${URI_PREFIX}/ind.traineddata.gz ) linguas_it? ( ${URI_PREFIX}/ita.traineddata.gz ) linguas_hu? ( ${URI_PREFIX}/hun.traineddata.gz ) linguas_ja? ( ${URI_PREFIX}/jpn.traineddata.gz ) linguas_ko? ( ${URI_PREFIX}/kor.traineddata.gz ) linguas_lt? ( ${URI_PREFIX}/lit.traineddata.gz ) linguas_lv? ( ${URI_PREFIX}/lav.traineddata.gz ) linguas_nl? ( ${URI_PREFIX}/nld.traineddata.gz ) linguas_nb? ( ${URI_PREFIX}/nor.traineddata.gz ) linguas_pl? ( ${URI_PREFIX}/pol.traineddata.gz ) linguas_pt? ( ${URI_PREFIX}/por.traineddata.gz ) linguas_ro? ( ${URI_PREFIX}/ron.traineddata.gz ) linguas_ru? ( ${URI_PREFIX}/rus.traineddata.gz ) linguas_sl? ( ${URI_PREFIX}/slv.traineddata.gz ) linguas_sr? ( ${URI_PREFIX}/srp.traineddata.gz ) linguas_sv? ( ${URI_PREFIX}/swe.traineddata.gz ) linguas_tl? ( ${URI_PREFIX}/tgl.traineddata.gz ) linguas_tr? ( ${URI_PREFIX}/tur.traineddata.gz ) linguas_uk? ( ${URI_PREFIX}/ukr.traineddata.gz ) linguas_vi? ( ${URI_PREFIX}/vie.traineddata.gz ) linguas_zh_CN? ( ${URI_PREFIX}/chi_sim.traineddata.gz ) linguas_zh_TW? ( ${URI_PREFIX}/chi_tra.traineddata.gz )" LICENSE="Apache-2.0" SLOT="0" KEYWORDS="~alpha ~amd64 ~mips ~ppc ~ppc64 ~sparc ~x86" IUSE="examples jpeg png tiff -webp +scrollview linguas_ar linguas_bg linguas_ca linguas_cs linguas_da linguas_de linguas_el linguas_es linguas_fi linguas_fr linguas_he linguas_hi linguas_id linguas_it linguas_hu linguas_ja linguas_ko linguas_lt linguas_lv linguas_nl linguas_nb linguas_pl linguas_pt linguas_ro linguas_ru linguas_sl linguas_sk linguas_sr linguas_sv linguas_th linguas_tl linguas_tr linguas_uk linguas_vi linguas_zh_CN linguas_zh_TW" DEPEND="media-libs/leptonica[zlib,tiff?,jpeg?,png?,webp?]" RDEPEND="${DEPEND}" # NOTES: # english language files are always installed because they are used by default # that is a tesseract bug and if possible this workaround should be avoided # see bug 287373 S="${WORKDIR}/tesseract-${PV}" src_prepare() { sed -i '/ACLOCAL_AMFLAGS/s/ -I m4//' Makefile.am eautoreconf sed -i 's!po/Makefile.in!!' configure.ac } src_configure() { # https://code.google.com/p/tesseract-ocr/issues/detail?id=707 econf $(use_enable scrollview graphics) } src_install() { emake DESTDIR="${D}" install || die "emake install failed" dodoc AUTHORS ChangeLog NEWS README ReleaseNotes || die "dodoc failed" insinto /usr/share/tessdata doins "${WORKDIR}"/*.traineddata doins "${WORKDIR}"/tesseract-ocr/tessdata/* if use examples; then insinto /usr/share/doc/${PF}/examples doins eurotext.tif phototest.tif || die "doins failed" fi }