--- src/CLucene/util/streambase_orig.h 2006-07-01 16:18:43.000000000 +0200 +++ src/CLucene/util/streambase.h 2006-09-28 00:51:02.000000000 +0200 @@ -1,9 +1,22 @@ -/*------------------------------------------------------------------------------ -* Copyright (C) 2003-2006 Jos van den Oever -* -* Distributable under the terms of either the Apache License (Version 2.0) or -* the GNU Lesser General Public License, as specified in the COPYING file. -------------------------------------------------------------------------------*/ +/* This file is part of Strigi Desktop Search + * + * Copyright (C) 2006 Jos van den Oever + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ #ifndef STREAMBASE_H #define STREAMBASE_H @@ -17,13 +30,14 @@ /** * @short Base class for stream read access to many different file types. - * + * * This class is based on the interface java.io.InputStream. It allows * for uniform access to streamed resources. * The main difference with the java equivalent is a performance improvement. * When reading data, data is not copied into a buffer provided by the caller, - * but a pointer to the read data is provided. This makes this interface especially - * useful for deriving from it and implementing filterers or transformers. + * but a pointer to the read data is provided. This makes this interface + * especially useful for deriving from it and implementing filterers or + * transformers. */ // java mapping: long=int64, int=int32, byte=uint8_t template @@ -37,29 +51,24 @@ StreamBase() :size(-1), position(0), status(Ok){ } virtual ~StreamBase(){} /** - * Return a string representation of the last error that has occurred. + * @brief Return a string representation of the last error. * If no error has occurred, an empty string is returned. **/ const char* getError() const { return error.c_str(); } StreamStatus getStatus() const { return status; } + /** + * @brief Get the current position in the stream. + * The value obtained from this function can be used to reset the stream. + **/ int64_t getPosition() const { return position; } - int64_t getSize() const { return size; } - /** - * @brief Reads @p ntoread characters from the stream and sets \a start to - * the first character that was read. - * - * If @p ntoread is @c 0, then at least one character will be read. - * - * @param start Pointer passed by reference that will be set to point to - * the retrieved array of characters. If the end of the stream - * is encountered or an error occurs, the value of @p start - * is undefined. - * @return the number of characters that were read. If 0 is returned, the - * end of the stream has been reached. If -1 is returned, an error - * has occured. + /** + * @brief Return the size of the stream. + * If the size of the stream is unknown, -1 + * is returned. If the end of the stream has been reached the size is + * always known. **/ - // virtual int32_t read(const T*& start) = 0; - /** + int64_t getSize() const { return size; } + /** * @brief Reads characters from the stream and sets \a start to * the first character that was read. * @@ -73,61 +82,34 @@ * @p is @c 0 the stream reads at least 1 character. * @return the number of characters that were read. If -1 is returned, the * end of the stream has been reached. If -2 is returned, an error - * has occured. + * has occurred. **/ virtual int32_t read(const T*& start, int32_t min, int32_t max) = 0; /** - * Same as read(const T*& start, int32_t ntoread), but may read more. - **/ -// virtual int32_t readAtLeast(const T*& start, int32_t ntoread) = 0; - /* the available value may be greater than the actual value if - the encoding is a variable one (such as utf8 or unicode) */ - /** * Skip @param ntoskip bytes. Unless an error occurs or the end of file is * encountered, this amount of bytes is skipped. - * The optional @param skipped can be use to find out how many bites were skipped. - * If the end of stream is reached, Eof is returned. - * If an error occured, Error is returned. + * This function returns new position in the stream. **/ virtual int64_t skip(int64_t ntoskip); - /** - * \short Marks the current position in this input stream. - * A subsequent call to the reset method repositions this stream at the - * last marked position so that subsequent reads re-read the same bytes. - * - * The readlimit arguments tells this input stream to allow that many - * bytes to be read before the mark position gets invalidated. - * The stream somehow remembers all the bytes read after the call to mark - * and stands ready to supply those same bytes again if and whenever the - * method reset is called. However, the stream is not required to remember - * any data at all if more than readlimit bytes are read from the stream - * before reset is called. - * - * When calling the method mark more than once at the same position in the - * stream, the call with the largest value for \p readlimit is defining. - **/ - virtual int64_t mark(int32_t readlimit) = 0; /** - * \short Repositions this stream to given requested position. - * The general contract of reset is: - * - Reset is guaranteed to work after a successfull call to read(), - * when new position is in the range of the data returned by read(). - * This means that @p pos must lie between than the position - * corresponding to the @p start parameter (x) of the @r read function - * and the position corresponding to the last position in the returned - * buffer (x + @p nread). - * if If the method mark has not been called since the stream was created, - * or the number of bytes read from the stream since mark was last - * called is larger than the argument to mark at that last call, then - * Error is returned. - * - Otherwise the stream is reset to a state such that all the bytes - * read since the most recent call to mark (or since the start of the - * file, if mark has not been called) will be resupplied to subsequent - * callers of the read method, followed by any bytes that otherwise - * would have been the next input data as of the time of the call to - * reset. + * @brief Repositions this stream to given requested position. + * Reset is guaranteed to work after a successful call to read(), + * when the new position is in the range of the data returned by read(). + * This means that @p pos must lie between than the position + * corresponding to the @p start parameter (x) of the @r read function + * and the position corresponding to the last position in the returned + * buffer (x + @p nread). **/ virtual int64_t reset(int64_t pos) = 0; + /** + * deprecated function + **/ + int64_t mark(int32_t readlimit) { + int64_t p = getPosition(); + const T* ptr; + read(ptr, readlimit, -1); + return reset(p); + } }; #define SKIPSTEP 1024 template --- src/CLucene/util/bufferedstream_orig.h 2006-07-01 16:18:42.000000000 +0200 +++ src/CLucene/util/bufferedstream.h 2006-09-28 00:51:09.000000000 +0200 @@ -1,23 +1,28 @@ -/** - * Copyright 2003-2006 The Apache Software Foundation +/* This file is part of Strigi Desktop Search * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (C) 2006 Jos van den Oever * - * http://www.apache.org/licenses/LICENSE-2.0 + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. */ #ifndef BUFFEREDSTREAM_H #define BUFFEREDSTREAM_H #include "streambase.h" #include "inputstreambuffer.h" +#include namespace jstreams { @@ -41,10 +46,9 @@ virtual int32_t fillBuffer(T* start, int32_t space) = 0; // this function might be useful if you want to reuse a bufferedstream void resetBuffer() {printf("implement 'resetBuffer'\n");} -public: BufferedInputStream(); +public: int32_t read(const T*& start, int32_t min, int32_t max); - int64_t mark(int32_t readlimit); int64_t reset(int64_t); virtual int64_t skip(int64_t ntoskip); }; @@ -64,6 +68,7 @@ space = buffer.makeSpace(missing); T* start = buffer.readPos + buffer.avail; nwritten = fillBuffer(start, space); + assert(StreamBase::status != Eof); if (nwritten > 0) { buffer.avail += nwritten; missing = ntoread - buffer.avail; @@ -84,20 +89,25 @@ // do we have enough space in the buffer? writeToBuffer(min); if (StreamBase::status == Error) return -2; - if (StreamBase::status == Eof) return -1; } int32_t nread = buffer.read(start, max); -/* if (nread == 0) { - printf("bis: start %p min %i max %i nread %i avail %i bsize %i pos %lli size %lli\n", - start, min, max, nread, buffer.avail, buffer.size, BufferedInputStream::position, BufferedInputStream::size); - printf("buf: start %p readpos %p marpos %p\n", buffer.start, buffer.readPos, buffer.markPos); - }*/ BufferedInputStream::position += nread; - if (BufferedInputStream::status == Ok && buffer.avail == 0 + if (BufferedInputStream::position > BufferedInputStream::size + && BufferedInputStream::size > 0) { + // error: we read more than was specified in size + // this is an error because all dependent code might have been labouring + // under a misapprehension + BufferedInputStream::status = Error; + BufferedInputStream::error = "Stream is longer than specified."; + nread = -2; + } else if (BufferedInputStream::status == Ok && buffer.avail == 0 && finishedWritingToBuffer) { BufferedInputStream::status = Eof; + if (BufferedInputStream::size == -1) { + BufferedInputStream::size = BufferedInputStream::position; + } // save one call to read() by already returning -1 if no data is there if (nread == 0) nread = -1; } @@ -105,12 +115,6 @@ } template int64_t -BufferedInputStream::mark(int32_t readlimit) { - buffer.mark(readlimit); - return StreamBase::position; -} -template -int64_t BufferedInputStream::reset(int64_t newpos) { if (StreamBase::status == Error) return -2; // check to see if we have this position --- src/CLucene/util/inputstreambuffer_orig.h 2006-07-01 16:18:43.000000000 +0200 +++ src/CLucene/util/inputstreambuffer.h 2006-09-28 00:51:18.000000000 +0200 @@ -1,17 +1,21 @@ -/** - * Copyright 2003-2006 The Apache Software Foundation +/* This file is part of Strigi Desktop Search * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (C) 2006 Jos van den Oever * - * http://www.apache.org/licenses/LICENSE-2.0 + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. */ #ifndef INPUTSTREAMBUFFER_H #define INPUTSTREAMBUFFER_H @@ -28,14 +32,10 @@ int32_t size; T* readPos; int32_t avail; - T* markPos; - int32_t markLimit; InputStreamBuffer(); ~InputStreamBuffer(); void setSize(int32_t size); - void mark(int32_t readlimit); - void reset(); int32_t read(const T*& start, int32_t max=0); /** @@ -47,7 +47,7 @@ template InputStreamBuffer::InputStreamBuffer() { - markPos = readPos = start = 0; + readPos = start = 0; size = avail = 0; } template @@ -59,7 +59,6 @@ InputStreamBuffer::setSize(int32_t size) { // store pointer information int32_t offset = readPos - start; - int32_t markOffset = (markPos) ? markPos - start : -1; // allocate memory in the buffer start = (T*)realloc(start, size*sizeof(T)); @@ -67,52 +66,6 @@ // restore pointer information readPos = start + offset; - markPos = (markOffset == -1) ?0 :start + markOffset; -} -template -void -InputStreamBuffer::mark(int32_t limit) { - // if there's no buffer yet, allocate one now - if (start == 0) { - setSize(limit+1); - } - // if we had a larger limit defined for the same position, do nothing - if (readPos == markPos && limit <= markLimit) { - return; - } - - markLimit = limit; - // if we have enough room, only set the mark - int32_t offset = readPos - start; - if (size - offset >= limit) { - markPos = readPos; - return; - } - - // if we don't have enough room start by - // moving memory to the start of the buffer - if (readPos != start) { - memmove(start, readPos, avail*sizeof(T)); - readPos = start; - } - - // if we have enough room now, finish - if (size >= limit) { - markPos = readPos; - return; - } - - // last resort: increase buffer size - setSize(limit+1); - markPos = readPos; -} -template -void -InputStreamBuffer::reset() { - if (markPos != 0) { - avail += readPos - markPos; - readPos = markPos; - } } template int32_t @@ -124,28 +77,16 @@ return space; } - if (markPos && readPos - markPos <= markLimit) { - // move data to the start of the buffer while respecting the set mark - if (markPos != start) { -// printf("moving with mark\n"); - int32_t n = avail + readPos - markPos; - memmove(start, markPos, n*sizeof(T)); - readPos -= markPos - start; - space += markPos - start; - markPos = start; - } - } else if (avail) { + if (avail) { if (readPos != start) { // printf("moving\n"); // move data to the start of the buffer memmove(start, readPos, avail*sizeof(T)); space += readPos - start; readPos = start; - markPos = 0; } } else { // we may start writing at the start of the buffer - markPos = 0; readPos = start; space = size; }