Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 149362
Collapse All | Expand All

(-)src/CLucene/util/streambase_orig.h (-71 / +53 lines)
Lines 1-9 Link Here
1
/*------------------------------------------------------------------------------
1
/* This file is part of Strigi Desktop Search
2
* Copyright (C) 2003-2006 Jos van den Oever
2
 *
3
* 
3
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
4
* Distributable under the terms of either the Apache License (Version 2.0) or 
4
 *
5
* the GNU Lesser General Public License, as specified in the COPYING file.
5
 * This library is free software; you can redistribute it and/or
6
------------------------------------------------------------------------------*/
6
 * modify it under the terms of the GNU Library General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Library General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Library General Public License
16
 * along with this library; see the file COPYING.LIB.  If not, write to
17
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
 * Boston, MA 02110-1301, USA.
19
 */
7
#ifndef STREAMBASE_H
20
#ifndef STREAMBASE_H
8
#define STREAMBASE_H
21
#define STREAMBASE_H
9
22
Lines 17-29 Link Here
17
30
18
/**
31
/**
19
 * @short Base class for stream read access to many different file types.
32
 * @short Base class for stream read access to many different file types.
20
 * 
33
 *
21
 * This class is based on the interface java.io.InputStream. It allows
34
 * This class is based on the interface java.io.InputStream. It allows
22
 * for uniform access to streamed resources.
35
 * for uniform access to streamed resources.
23
 * The main difference with the java equivalent is a performance improvement.
36
 * The main difference with the java equivalent is a performance improvement.
24
 * When reading data, data is not copied into a buffer provided by the caller,
37
 * When reading data, data is not copied into a buffer provided by the caller,
25
 * but a pointer to the read data is provided. This makes this interface especially
38
 * but a pointer to the read data is provided. This makes this interface
26
 * useful for deriving from it and implementing filterers or transformers.
39
 * especially useful for deriving from it and implementing filterers or
40
 * transformers.
27
 */
41
 */
28
// java mapping: long=int64, int=int32, byte=uint8_t
42
// java mapping: long=int64, int=int32, byte=uint8_t
29
template <class T>
43
template <class T>
Lines 37-65 Link Here
37
    StreamBase() :size(-1), position(0), status(Ok){ }
51
    StreamBase() :size(-1), position(0), status(Ok){ }
38
    virtual ~StreamBase(){}
52
    virtual ~StreamBase(){}
39
    /**
53
    /**
40
     * Return a string representation of the last error that has occurred.
54
     * @brief  Return a string representation of the last error.
41
     * If no error has occurred, an empty string is returned.
55
     * If no error has occurred, an empty string is returned.
42
     **/
56
     **/
43
    const char* getError() const { return error.c_str(); }
57
    const char* getError() const { return error.c_str(); }
44
    StreamStatus getStatus() const { return status; }
58
    StreamStatus getStatus() const { return status; }
59
    /**
60
     * @brief Get the current position in the stream.
61
     * The value obtained from this function can be used to reset the stream.
62
     **/
45
    int64_t getPosition() const { return position; }
63
    int64_t getPosition() const { return position; }
46
    int64_t getSize() const { return size; }
64
    /**
47
    /** 
65
     * @brief Return the size of the stream.
48
     * @brief Reads @p ntoread characters from the stream and sets \a start to
66
     * If the size of the stream is unknown, -1
49
     * the first character that was read.
67
     * is returned. If the end of the stream has been reached the size is
50
     *
68
     * always known.
51
     * If @p ntoread is @c 0, then at least one character will be read.
52
     *
53
     * @param start Pointer passed by reference that will be set to point to
54
     *              the retrieved array of characters. If the end of the stream
55
     *              is encountered or an error occurs, the value of @p start
56
     *              is undefined.
57
     * @return the number of characters that were read. If 0 is returned, the
58
     *         end of the stream has been reached. If -1 is returned, an error
59
     *         has occured.
60
     **/
69
     **/
61
 //   virtual int32_t read(const T*& start) = 0;
70
    int64_t getSize() const { return size; }
62
    /** 
71
    /**
63
     * @brief Reads characters from the stream and sets \a start to
72
     * @brief Reads characters from the stream and sets \a start to
64
     * the first character that was read.
73
     * the first character that was read.
65
     *
74
     *
Lines 73-133 Link Here
73
     *                @p is @c 0 the stream reads at least 1 character.
82
     *                @p is @c 0 the stream reads at least 1 character.
74
     * @return the number of characters that were read. If -1 is returned, the
83
     * @return the number of characters that were read. If -1 is returned, the
75
     *         end of the stream has been reached. If -2 is returned, an error
84
     *         end of the stream has been reached. If -2 is returned, an error
76
     *         has occured.
85
     *         has occurred.
77
     **/
86
     **/
78
    virtual int32_t read(const T*& start, int32_t min, int32_t max) = 0;
87
    virtual int32_t read(const T*& start, int32_t min, int32_t max) = 0;
79
    /**
88
    /**
80
     * Same as read(const T*& start, int32_t ntoread), but may read more.
81
     **/
82
//    virtual int32_t readAtLeast(const T*& start, int32_t ntoread) = 0;
83
    /* the available value may be greater than the actual value if
84
      the encoding is a variable one (such as utf8 or unicode) */
85
    /**
86
     * Skip @param ntoskip bytes. Unless an error occurs or the end of file is
89
     * Skip @param ntoskip bytes. Unless an error occurs or the end of file is
87
     * encountered, this amount of bytes is skipped.
90
     * encountered, this amount of bytes is skipped.
88
     * The optional @param skipped can be use to find out how many bites were skipped.
91
     * This function returns new position in the stream.
89
     * If the end of stream is reached, Eof is returned.
90
     * If an error occured, Error is returned.
91
     **/
92
     **/
92
    virtual int64_t skip(int64_t ntoskip);
93
    virtual int64_t skip(int64_t ntoskip);
93
     /**
94
      * \short Marks the current position in this input stream.
95
      * A subsequent call to the reset method repositions this stream at the
96
      * last marked position so that subsequent reads re-read the same bytes.
97
      *
98
      * The readlimit arguments tells this input stream to allow that many
99
      * bytes to be read before the mark position gets invalidated.
100
      * The stream somehow remembers all the bytes read after the call to mark
101
      * and stands ready to supply those same bytes again if and whenever the
102
      * method reset is called. However, the stream is not required to remember
103
      * any data at all if more than readlimit bytes are read from the stream
104
      * before reset is called.
105
      *
106
      * When calling the method mark more than once at the same position in the
107
      * stream, the call with the largest value for \p readlimit is defining.
108
      **/
109
    virtual int64_t mark(int32_t readlimit) = 0;
110
      /**
94
      /**
111
       * \short Repositions this stream to given requested position.
95
       * @brief Repositions this stream to given requested position.
112
       * The general contract of reset is:
96
       * Reset is guaranteed to work after a successful call to read(),
113
       * - Reset is guaranteed to work after a successfull call to read(),
97
       * when the new position is in the range of the data returned by read().
114
       *   when new position is in the range of the data returned by read().
98
       * This means that @p pos must lie between than the position
115
       *   This means that @p pos must lie between than the position
99
       * corresponding to the @p start parameter (x) of the @r read function
116
       *   corresponding to the @p start parameter (x) of the @r read function
100
       * and the position corresponding to the last position in the returned
117
       *   and the position corresponding to the last position in the returned
101
       * buffer (x + @p nread).
118
       *   buffer (x + @p nread).
119
       * if If the method mark has not been called since the stream was created,
120
       *   or the number of bytes read from the stream since mark was last
121
       *   called is larger than the argument to mark at that last call, then
122
       *   Error is returned.
123
       * - Otherwise the stream is reset to a state such that all the bytes
124
       *   read since the most recent call to mark (or since the start of the
125
       *   file, if mark has not been called) will be resupplied to subsequent
126
       *   callers of the read method, followed by any bytes that otherwise
127
       *   would have been the next input data as of the time of the call to
128
       *   reset.
129
       **/
102
       **/
130
    virtual int64_t reset(int64_t pos) = 0;
103
    virtual int64_t reset(int64_t pos) = 0;
104
    /**
105
     * deprecated function
106
     **/
107
    int64_t mark(int32_t readlimit) {
108
        int64_t p = getPosition();
109
        const T* ptr;
110
        read(ptr, readlimit, -1);
111
        return reset(p);
112
    }
131
};
113
};
132
#define SKIPSTEP 1024
114
#define SKIPSTEP 1024
133
template <class T>
115
template <class T>
(-)src/CLucene/util/bufferedstream_orig.h (-26 / +30 lines)
Lines 1-23 Link Here
1
/**
1
/* This file is part of Strigi Desktop Search
2
 * Copyright 2003-2006 The Apache Software Foundation
3
 *
2
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
3
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
4
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Library General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
10
 * This library is distributed in the hope that it will be useful,
11
 * distributed under the License is distributed on an "AS IS" BASIS,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * See the License for the specific language governing permissions and
13
 * Library General Public License for more details.
14
 * limitations under the License.
14
 *
15
 * You should have received a copy of the GNU Library General Public License
16
 * along with this library; see the file COPYING.LIB.  If not, write to
17
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
 * Boston, MA 02110-1301, USA.
15
 */
19
 */
16
#ifndef BUFFEREDSTREAM_H
20
#ifndef BUFFEREDSTREAM_H
17
#define BUFFEREDSTREAM_H
21
#define BUFFEREDSTREAM_H
18
22
19
#include "streambase.h"
23
#include "streambase.h"
20
#include "inputstreambuffer.h"
24
#include "inputstreambuffer.h"
25
#include <cassert>
21
26
22
namespace jstreams {
27
namespace jstreams {
23
28
Lines 41-50 Link Here
41
    virtual int32_t fillBuffer(T* start, int32_t space) = 0;
46
    virtual int32_t fillBuffer(T* start, int32_t space) = 0;
42
    // this function might be useful if you want to reuse a bufferedstream
47
    // this function might be useful if you want to reuse a bufferedstream
43
    void resetBuffer() {printf("implement 'resetBuffer'\n");}
48
    void resetBuffer() {printf("implement 'resetBuffer'\n");}
44
public:
45
    BufferedInputStream<T>();
49
    BufferedInputStream<T>();
50
public:
46
    int32_t read(const T*& start, int32_t min, int32_t max);
51
    int32_t read(const T*& start, int32_t min, int32_t max);
47
    int64_t mark(int32_t readlimit);
48
    int64_t reset(int64_t);
52
    int64_t reset(int64_t);
49
    virtual int64_t skip(int64_t ntoskip);
53
    virtual int64_t skip(int64_t ntoskip);
50
};
54
};
Lines 64-69 Link Here
64
        space = buffer.makeSpace(missing);
68
        space = buffer.makeSpace(missing);
65
        T* start = buffer.readPos + buffer.avail;
69
        T* start = buffer.readPos + buffer.avail;
66
        nwritten = fillBuffer(start, space);
70
        nwritten = fillBuffer(start, space);
71
        assert(StreamBase<T>::status != Eof);
67
        if (nwritten > 0) {
72
        if (nwritten > 0) {
68
            buffer.avail += nwritten;
73
            buffer.avail += nwritten;
69
            missing = ntoread - buffer.avail;
74
            missing = ntoread - buffer.avail;
Lines 84-103 Link Here
84
        // do we have enough space in the buffer?
89
        // do we have enough space in the buffer?
85
        writeToBuffer(min);
90
        writeToBuffer(min);
86
        if (StreamBase<T>::status == Error) return -2;
91
        if (StreamBase<T>::status == Error) return -2;
87
        if (StreamBase<T>::status == Eof) return -1;
88
    }
92
    }
89
93
90
    int32_t nread = buffer.read(start, max);
94
    int32_t nread = buffer.read(start, max);
91
/*    if (nread == 0) {
92
        printf("bis: start %p min %i max %i nread %i avail %i bsize %i pos %lli size %lli\n",
93
        start, min, max, nread, buffer.avail, buffer.size, BufferedInputStream<T>::position, BufferedInputStream<T>::size);
94
        printf("buf: start %p readpos %p marpos %p\n", buffer.start, buffer.readPos, buffer.markPos);
95
    }*/
96
95
97
    BufferedInputStream<T>::position += nread;
96
    BufferedInputStream<T>::position += nread;
98
    if (BufferedInputStream<T>::status == Ok && buffer.avail == 0
97
    if (BufferedInputStream<T>::position > BufferedInputStream<T>::size
98
        && BufferedInputStream<T>::size > 0) {
99
        // error: we read more than was specified in size
100
        // this is an error because all dependent code might have been labouring
101
        // under a misapprehension
102
        BufferedInputStream<T>::status = Error;
103
        BufferedInputStream<T>::error = "Stream is longer than specified.";
104
        nread = -2;
105
    } else if (BufferedInputStream<T>::status == Ok && buffer.avail == 0
99
            && finishedWritingToBuffer) {
106
            && finishedWritingToBuffer) {
100
        BufferedInputStream<T>::status = Eof;
107
        BufferedInputStream<T>::status = Eof;
108
        if (BufferedInputStream<T>::size == -1) {
109
            BufferedInputStream<T>::size = BufferedInputStream<T>::position;
110
        }
101
        // save one call to read() by already returning -1 if no data is there
111
        // save one call to read() by already returning -1 if no data is there
102
        if (nread == 0) nread = -1;
112
        if (nread == 0) nread = -1;
103
    }
113
    }
Lines 105-116 Link Here
105
}
115
}
106
template <class T>
116
template <class T>
107
int64_t
117
int64_t
108
BufferedInputStream<T>::mark(int32_t readlimit) {
109
    buffer.mark(readlimit);
110
    return StreamBase<T>::position;
111
}
112
template <class T>
113
int64_t
114
BufferedInputStream<T>::reset(int64_t newpos) {
118
BufferedInputStream<T>::reset(int64_t newpos) {
115
    if (StreamBase<T>::status == Error) return -2;
119
    if (StreamBase<T>::status == Error) return -2;
116
    // check to see if we have this position
120
    // check to see if we have this position
(-)src/CLucene/util/inputstreambuffer_orig.h (-76 / +17 lines)
Lines 1-17 Link Here
1
/**
1
/* This file is part of Strigi Desktop Search
2
 * Copyright 2003-2006 The Apache Software Foundation
3
 *
2
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
3
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
4
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Library General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
10
 * This library is distributed in the hope that it will be useful,
11
 * distributed under the License is distributed on an "AS IS" BASIS,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * See the License for the specific language governing permissions and
13
 * Library General Public License for more details.
14
 * limitations under the License.
14
 *
15
 * You should have received a copy of the GNU Library General Public License
16
 * along with this library; see the file COPYING.LIB.  If not, write to
17
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
 * Boston, MA 02110-1301, USA.
15
 */
19
 */
16
#ifndef INPUTSTREAMBUFFER_H
20
#ifndef INPUTSTREAMBUFFER_H
17
#define INPUTSTREAMBUFFER_H
21
#define INPUTSTREAMBUFFER_H
Lines 28-41 Link Here
28
    int32_t size;
32
    int32_t size;
29
    T* readPos;
33
    T* readPos;
30
    int32_t avail;
34
    int32_t avail;
31
    T* markPos;
32
    int32_t markLimit;
33
35
34
    InputStreamBuffer();
36
    InputStreamBuffer();
35
    ~InputStreamBuffer();
37
    ~InputStreamBuffer();
36
    void setSize(int32_t size);
38
    void setSize(int32_t size);
37
    void mark(int32_t readlimit);
38
    void reset();
39
    int32_t read(const T*& start, int32_t max=0);
39
    int32_t read(const T*& start, int32_t max=0);
40
40
41
    /**
41
    /**
Lines 47-53 Link Here
47
47
48
template <class T>
48
template <class T>
49
InputStreamBuffer<T>::InputStreamBuffer() {
49
InputStreamBuffer<T>::InputStreamBuffer() {
50
    markPos = readPos = start = 0;
50
    readPos = start = 0;
51
    size = avail = 0;
51
    size = avail = 0;
52
}
52
}
53
template <class T>
53
template <class T>
Lines 59-65 Link Here
59
InputStreamBuffer<T>::setSize(int32_t size) {
59
InputStreamBuffer<T>::setSize(int32_t size) {
60
    // store pointer information
60
    // store pointer information
61
    int32_t offset = readPos - start;
61
    int32_t offset = readPos - start;
62
    int32_t markOffset = (markPos) ? markPos - start : -1;
63
62
64
    // allocate memory in the buffer
63
    // allocate memory in the buffer
65
    start = (T*)realloc(start, size*sizeof(T));
64
    start = (T*)realloc(start, size*sizeof(T));
Lines 67-118 Link Here
67
66
68
    // restore pointer information
67
    // restore pointer information
69
    readPos = start + offset;
68
    readPos = start + offset;
70
    markPos = (markOffset == -1) ?0 :start + markOffset;
71
}
72
template <class T>
73
void
74
InputStreamBuffer<T>::mark(int32_t limit) {
75
    // if there's no buffer yet, allocate one now
76
    if (start == 0) {
77
        setSize(limit+1);
78
    }
79
    // if we had a larger limit defined for the same position, do nothing
80
    if (readPos == markPos && limit <= markLimit) {
81
        return;
82
    }
83
            
84
    markLimit = limit;
85
    // if we have enough room, only set the mark
86
    int32_t offset = readPos - start;
87
    if (size - offset >= limit) {
88
        markPos = readPos;
89
        return;
90
    }
91
92
    // if we don't have enough room start by
93
    // moving memory to the start of the buffer
94
    if (readPos != start) {
95
        memmove(start, readPos, avail*sizeof(T));
96
        readPos = start;
97
    }
98
99
    // if we have enough room now, finish
100
    if (size >= limit) {
101
        markPos = readPos;
102
        return;
103
    }
104
105
    // last resort: increase buffer size
106
    setSize(limit+1);
107
    markPos = readPos;
108
}
109
template <class T>
110
void
111
InputStreamBuffer<T>::reset() {
112
    if (markPos != 0) {
113
        avail += readPos - markPos;
114
        readPos = markPos;
115
    }
116
}
69
}
117
template <class T>
70
template <class T>
118
int32_t
71
int32_t
Lines 124-151 Link Here
124
        return space;
77
        return space;
125
    }
78
    }
126
79
127
    if (markPos && readPos - markPos <= markLimit) {
80
    if (avail) {
128
        // move data to the start of the buffer while respecting the set mark
129
        if (markPos != start) {
130
//            printf("moving with mark\n");
131
            int32_t n = avail + readPos - markPos;
132
            memmove(start, markPos, n*sizeof(T));
133
            readPos -= markPos - start;
134
            space += markPos - start;
135
            markPos = start;
136
        }
137
    } else if (avail) {
138
        if (readPos != start) {
81
        if (readPos != start) {
139
//            printf("moving\n");
82
//            printf("moving\n");
140
            // move data to the start of the buffer
83
            // move data to the start of the buffer
141
            memmove(start, readPos, avail*sizeof(T));
84
            memmove(start, readPos, avail*sizeof(T));
142
            space += readPos - start;
85
            space += readPos - start;
143
            readPos = start;
86
            readPos = start;
144
            markPos = 0;
145
        }
87
        }
146
    } else {
88
    } else {
147
        // we may start writing at the start of the buffer
89
        // we may start writing at the start of the buffer
148
        markPos = 0;
149
        readPos = start;
90
        readPos = start;
150
        space = size;
91
        space = size;
151
    }
92
    }

Return to bug 149362