001    /* BufferedReader.java
002       Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
003         Free Software Foundation, Inc.
004    
005    This file is part of GNU Classpath.
006    
007    GNU Classpath is free software; you can redistribute it and/or modify
008    it under the terms of the GNU General Public License as published by
009    the Free Software Foundation; either version 2, or (at your option)
010    any later version.
011     
012    GNU Classpath is distributed in the hope that it will be useful, but
013    WITHOUT ANY WARRANTY; without even the implied warranty of
014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015    General Public License for more details.
016    
017    You should have received a copy of the GNU General Public License
018    along with GNU Classpath; see the file COPYING.  If not, write to the
019    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
020    02110-1301 USA.
021    
022    Linking this library statically or dynamically with other modules is
023    making a combined work based on this library.  Thus, the terms and
024    conditions of the GNU General Public License cover the whole
025    combination.
026    
027    As a special exception, the copyright holders of this library give you
028    permission to link this library with independent modules to produce an
029    executable, regardless of the license terms of these independent
030    modules, and to copy and distribute the resulting executable under
031    terms of your choice, provided that you also meet, for each linked
032    independent module, the terms and conditions of the license of that
033    module.  An independent module is a module which is not derived from
034    or based on this library.  If you modify this library, you may extend
035    this exception to your version of the library, but you are not
036    obligated to do so.  If you do not wish to do so, delete this
037    exception statement from your version. */
038    
039    
040    package java.io;
041    
042    import gnu.java.lang.CPStringBuilder;
043    
044    /* Written using "Java Class Libraries", 2nd edition, plus online
045     * API docs for JDK 1.2 beta from http://www.javasoft.com.
046     * Status:  Believed complete and correct.
047     */
048    
049    /**
050     * This subclass of <code>FilterReader</code> buffers input from an 
051     * underlying implementation to provide a possibly more efficient read
052     * mechanism.  It maintains the buffer and buffer state in instance 
053     * variables that are available to subclasses.  The default buffer size
054     * of 8192 chars can be overridden by the creator of the stream.
055     * <p>
056     * This class also implements mark/reset functionality.  It is capable
057     * of remembering any number of input chars, to the limits of
058     * system memory or the size of <code>Integer.MAX_VALUE</code>
059     *
060     * @author Per Bothner (bothner@cygnus.com)
061     * @author Aaron M. Renn (arenn@urbanophile.com)
062     */
063    public class BufferedReader extends Reader
064    {
065      Reader in;
066      char[] buffer;
067      /* Index of current read position.  Must be >= 0 and <= limit. */
068      /* There is a special case where pos may be equal to limit+1; this
069       * is used as an indicator that a readLine was done with a '\r' was
070       * the very last char in the buffer.  Since we don't want to read-ahead
071       * and potentially block, we set pos this way to indicate the situation
072       * and deal with it later.  Doing it this way rather than having a
073       * separate boolean field to indicate the condition has the advantage
074       * that it is self-clearing on things like mark/reset.
075       */
076      int pos;
077      /* Limit of valid data in buffer.  Must be >= pos and <= buffer.length. */
078      /* This can be < pos in the one special case described above. */
079      int limit;
080    
081      /* The value -1 means there is no mark, or the mark has been invalidated.
082         Otherwise, markPos is the index in the buffer of the marked position.
083         Must be >= 0 and <= pos.
084         Note we do not explicitly store the read-limit.
085         The implicit read-limit is (buffer.length - markPos), which is
086         guaranteed to be >= the read-limit requested in the call to mark. */
087      int markPos = -1;
088    
089      // The JCL book specifies the default buffer size as 8K characters.
090      // This is package-private because it is used by LineNumberReader.
091      static final int DEFAULT_BUFFER_SIZE = 8192;
092    
093      /**
094        * Create a new <code>BufferedReader</code> that will read from the 
095        * specified subordinate stream with a default buffer size of 8192 chars.
096        *
097        * @param in The subordinate stream to read from
098        */
099      public BufferedReader(Reader in)
100      {
101        this(in, DEFAULT_BUFFER_SIZE);
102      }
103    
104      /**
105       * Create a new <code>BufferedReader</code> that will read from the 
106       * specified subordinate stream with a buffer size that is specified by the 
107       * caller.
108       *
109       * @param in The subordinate stream to read from
110       * @param size The buffer size to use
111       *
112       * @exception IllegalArgumentException if size &lt;= 0
113       */
114      public BufferedReader(Reader in, int size)
115      {
116        super(in.lock);
117        if (size <= 0)
118          throw new IllegalArgumentException("Illegal buffer size: " + size);
119        this.in = in;
120        buffer = new char[size];
121      }
122    
123      /**
124       * This method closes the underlying stream and frees any associated
125       * resources.
126       *
127       * @exception IOException If an error occurs
128       */
129      public void close() throws IOException
130      {
131        synchronized (lock)
132          {
133            if (in != null)
134              in.close();
135            in = null;
136            buffer = null;
137          }
138      }
139    
140      /**
141       * Returns <code>true</code> to indicate that this class supports mark/reset 
142       * functionality.
143       *
144       * @return <code>true</code>
145       */
146      public boolean markSupported()
147      {
148        return true;
149      }
150    
151      /**
152       * Mark a position in the input to which the stream can be
153       * "reset" by calling the <code>reset()</code> method.  The parameter
154       * <code>readLimit</code> is the number of chars that can be read from the 
155       * stream after setting the mark before the mark becomes invalid.  For
156       * example, if <code>mark()</code> is called with a read limit of 10, then 
157       * when 11 chars of data are read from the stream before the 
158       * <code>reset()</code> method is called, then the mark is invalid and the 
159       * stream object instance is not required to remember the mark.
160       * <p>
161       * Note that the number of chars that can be remembered by this method
162       * can be greater than the size of the internal read buffer.  It is also
163       * not dependent on the subordinate stream supporting mark/reset
164       * functionality.
165       *
166       * @param readLimit The number of chars that can be read before the mark 
167       *        becomes invalid
168       *
169       * @exception IOException If an error occurs
170       * @exception IllegalArgumentException if readLimit is negative.
171       */
172      public void mark(int readLimit) throws IOException
173      {
174        if (readLimit < 0)
175          throw new IllegalArgumentException("Read-ahead limit is negative");
176    
177        synchronized (lock)
178          {
179            checkStatus();
180            // In this method we need to be aware of the special case where
181            // pos + 1 == limit.  This indicates that a '\r' was the last char
182            // in the buffer during a readLine.  We'll want to maintain that
183            // condition after we shift things around and if a larger buffer is
184            // needed to track readLimit, we'll have to make it one element
185            // larger to ensure we don't invalidate the mark too early, if the
186            // char following the '\r' is NOT a '\n'.  This is ok because, per
187            // the spec, we are not required to invalidate when passing readLimit.
188            //
189            // Note that if 'pos > limit', then doing 'limit -= pos' will cause
190            // limit to be negative.  This is the only way limit will be < 0.
191    
192            if (pos + readLimit > limit)
193              {
194                char[] old_buffer = buffer;
195                int extraBuffSpace = 0;
196                if (pos > limit)
197                  extraBuffSpace = 1;
198                if (readLimit + extraBuffSpace > limit)
199                  buffer = new char[readLimit + extraBuffSpace];
200                limit -= pos;
201                if (limit >= 0)
202                  {
203                    System.arraycopy(old_buffer, pos, buffer, 0, limit);
204                    pos = 0;
205                  }
206              }
207    
208            if (limit < 0)
209              {
210                // Maintain the relationship of 'pos > limit'.
211                pos = 1;
212                limit = markPos = 0;
213              }
214            else
215              markPos = pos;
216            // Now pos + readLimit <= buffer.length. thus if we need to read
217            // beyond buffer.length, then we are allowed to invalidate markPos.
218          }
219      }
220    
221      /**
222       * Reset the stream to the point where the <code>mark()</code> method
223       * was called.  Any chars that were read after the mark point was set will
224       * be re-read during subsequent reads.
225       * <p>
226       * This method will throw an IOException if the number of chars read from
227       * the stream since the call to <code>mark()</code> exceeds the mark limit
228       * passed when establishing the mark.
229       *
230       * @exception IOException If an error occurs;
231       */
232      public void reset() throws IOException
233      {
234        synchronized (lock)
235          {
236            checkStatus();
237            if (markPos < 0)
238              throw new IOException("mark never set or invalidated");
239    
240            // Need to handle the extremely unlikely case where a readLine was
241            // done with a '\r' as the last char in the buffer; which was then
242            // immediately followed by a mark and a reset with NO intervening
243            // read of any sort.  In that case, setting pos to markPos would
244            // lose that info and a subsequent read would thus not skip a '\n'
245            // (if one exists).  The value of limit in this rare case is zero.
246            // We can assume that if limit is zero for other reasons, then
247            // pos is already set to zero and doesn't need to be readjusted.
248            if (limit > 0)
249              pos = markPos;
250          }
251      }
252    
253      /**
254       * This method determines whether or not a stream is ready to be read.  If
255       * this method returns <code>false</code> then this stream could (but is
256       * not guaranteed to) block on the next read attempt.
257       *
258       * @return <code>true</code> if this stream is ready to be read, 
259       * <code>false</code> otherwise
260       *
261       * @exception IOException If an error occurs
262       */
263      public boolean ready() throws IOException
264      {
265        synchronized (lock)
266          {
267            checkStatus();
268            return pos < limit || in.ready();
269          }
270      }
271    
272      /**
273       * This method read chars from a stream and stores them into a caller
274       * supplied buffer.  It starts storing the data at index 
275       * <code>offset</code> into
276       * the buffer and attempts to read <code>len</code> chars.  This method can
277       * return before reading the number of chars requested.  The actual number
278       * of chars read is returned as an int.  A -1 is returned to indicate the
279       * end of the stream.
280       * <p>
281       * This method will block until some data can be read.
282       *
283       * @param buf The array into which the chars read should be stored
284       * @param offset The offset into the array to start storing chars
285       * @param count The requested number of chars to read
286       *
287       * @return The actual number of chars read, or -1 if end of stream.
288       *
289       * @exception IOException If an error occurs.
290       * @exception IndexOutOfBoundsException If offset and count are not
291       * valid regarding buf.
292       */
293      public int read(char[] buf, int offset, int count) throws IOException
294      {
295        if (offset < 0 || offset + count > buf.length || count < 0)
296          throw new IndexOutOfBoundsException();
297    
298        synchronized (lock)
299          {
300            checkStatus();
301            // Once again, we need to handle the special case of a readLine
302            // that has a '\r' at the end of the buffer.  In this case, we'll
303            // need to skip a '\n' if it is the next char to be read.
304            // This special case is indicated by 'pos > limit'.
305            boolean retAtEndOfBuffer = false;
306    
307            int avail = limit - pos;
308            if (count > avail)
309              {
310                if (avail > 0)
311                  count = avail;
312                else // pos >= limit
313                  {
314                    if (limit == buffer.length)
315                      markPos = -1; // read too far - invalidate the mark.
316                    if (pos > limit)
317                      {
318                        // Set a boolean and make pos == limit to simplify things.
319                        retAtEndOfBuffer = true;
320                        --pos;
321                      }
322                    if (markPos < 0)
323                      {
324                        // Optimization:  can read directly into buf.
325                        if (count >= buffer.length && !retAtEndOfBuffer)
326                          return in.read(buf, offset, count);
327                        pos = limit = 0;
328                      }
329                    avail = in.read(buffer, limit, buffer.length - limit);
330                    if (retAtEndOfBuffer && avail > 0 && buffer[limit] == '\n')
331                      {
332                        --avail;
333                        limit++;
334                      }
335                    if (avail < count)
336                      {
337                        if (avail <= 0)
338                          return avail;
339                        count = avail;
340                      }
341                    limit += avail;
342                  }
343              }
344            System.arraycopy(buffer, pos, buf, offset, count);
345            pos += count;
346            return count;
347          }
348      }
349    
350      /* Read more data into the buffer.  Update pos and limit appropriately.
351         Assumes pos==limit initially.  May invalidate the mark if read too much.
352         Return number of chars read (never 0), or -1 on eof. */
353      private int fill() throws IOException
354      {
355        checkStatus();
356        // Handle the special case of a readLine that has a '\r' at the end of
357        // the buffer.  In this case, we'll need to skip a '\n' if it is the
358        // next char to be read.  This special case is indicated by 'pos > limit'.
359        boolean retAtEndOfBuffer = false;
360        if (pos > limit)
361          {
362            retAtEndOfBuffer = true;
363            --pos;
364          }
365    
366        if (markPos >= 0 && limit == buffer.length)
367          markPos = -1;
368        if (markPos < 0)
369          pos = limit = 0;
370        int count = in.read(buffer, limit, buffer.length - limit);
371        if (count > 0)
372          limit += count;
373    
374        if (retAtEndOfBuffer && buffer[pos] == '\n')
375          {
376            --count;
377            // If the mark was set to the location of the \n, then we
378            // must change it to fully pretend that the \n does not
379            // exist.
380            if (markPos == pos)
381              ++markPos;
382            ++pos;
383          }
384    
385        return count;
386      }
387      
388      public int read() throws IOException
389      {
390        synchronized (lock)
391          {
392            checkStatus();
393            if (pos >= limit && fill () <= 0)
394              return -1;
395            return buffer[pos++];
396          }
397      }
398    
399      /* Return the end of the line starting at this.pos and ending at limit.
400       * The index returns is *before* any line terminators, or limit
401       * if no line terminators were found.
402       */
403      private int lineEnd(int limit)
404      {
405        int i = pos;
406        for (; i < limit; i++)
407          {
408            char ch = buffer[i];
409            if (ch == '\n' || ch == '\r')
410              break;
411          }
412        return i;
413      }
414    
415      /**
416       * This method reads a single line of text from the input stream, returning
417       * it as a <code>String</code>.  A line is terminated by "\n", a "\r", or
418       * an "\r\n" sequence.  The system dependent line separator is not used.
419       * The line termination characters are not returned in the resulting
420       * <code>String</code>.
421       * 
422       * @return The line of text read, or <code>null</code> if end of stream.
423       * 
424       * @exception IOException If an error occurs
425       */
426      public String readLine() throws IOException
427      {
428        checkStatus();
429        // Handle the special case where a previous readLine (with no intervening
430        // reads/skips) had a '\r' at the end of the buffer.
431        // In this case, we'll need to skip a '\n' if it's the next char to be read.
432        // This special case is indicated by 'pos > limit'.
433        if (pos > limit)
434          {
435            int ch = read();
436            if (ch < 0)
437              return null;
438            if (ch != '\n')
439              --pos;
440          }
441        int i = lineEnd(limit);
442        if (i < limit)
443          {
444            String str = String.valueOf(buffer, pos, i - pos);
445            pos = i + 1;
446            // If the last char in the buffer is a '\r', we must remember
447            // to check if the next char to be read after the buffer is refilled
448            // is a '\n'.  If so, skip it.  To indicate this condition, we set pos
449            // to be limit + 1, which normally is never possible.
450            if (buffer[i] == '\r')
451              if (pos == limit || buffer[pos] == '\n')
452                pos++;
453            return str;
454          }
455        CPStringBuilder sbuf = new CPStringBuilder(200);
456        sbuf.append(buffer, pos, i - pos);
457        pos = i;
458        // We only want to return null when no characters were read before
459        // EOF.  So we must keep track of this separately.  Otherwise we
460        // would treat an empty `sbuf' as an EOF condition, which is wrong
461        // when there is just a newline.
462        boolean eof = false;
463        for (;;)
464          {
465            // readLine should block. So we must not return until a -1 is reached.
466            if (pos >= limit)
467              {
468                // here count == 0 isn't sufficient to give a failure.
469                int count = fill();
470                if (count < 0)
471                  {
472                    eof = true;
473                    break;
474                  }
475                continue;
476              }
477            int ch = buffer[pos++];
478            if (ch == '\n' || ch == '\r')
479              {
480                // Check here if a '\r' was the last char in the buffer; if so,
481                // mark it as in the comment above to indicate future reads
482                // should skip a newline that is the next char read after
483                // refilling the buffer.
484                if (ch == '\r')
485                  if (pos == limit || buffer[pos] == '\n')
486                    pos++;
487                break;
488              }
489            i = lineEnd(limit);
490            sbuf.append(buffer, pos - 1, i - (pos - 1));
491            pos = i;
492          }
493        return (sbuf.length() == 0 && eof) ? null : sbuf.toString();
494      }
495    
496      /**
497       * This method skips the specified number of chars in the stream.  It
498       * returns the actual number of chars skipped, which may be less than the
499       * requested amount.
500       * <p>
501       * This method first discards chars in the buffer, then calls the
502       * <code>skip</code> method on the underlying stream to skip the 
503       * remaining chars.
504       *
505       * @param count The requested number of chars to skip
506       *
507       * @return The actual number of chars skipped.
508       *
509       * @exception IOException If an error occurs.
510       * @exception IllegalArgumentException If count is negative.
511       */
512      public long skip(long count) throws IOException
513      {
514        synchronized (lock)
515          {
516            checkStatus();
517            if (count < 0)
518              throw new IllegalArgumentException("skip value is negative");
519            if (count == 0)
520              return 0;
521            // Yet again, we need to handle the special case of a readLine
522            // that has a '\r' at the end of the buffer.  In this case, we need
523            // to ignore a '\n' if it is the next char to be read.
524            // This special case is indicated by 'pos > limit' (i.e. avail < 0).
525            // To simplify things, if we're dealing with the special case for
526            // readLine, just read the next char (since the fill method will
527            // skip the '\n' for us).  By doing this, we'll have to back up pos.
528            // That's easier than trying to keep track of whether we've skipped
529            // one element or not.
530            if (pos > limit)
531              {
532                if (read() < 0)
533                  return 0;
534                else
535                  --pos; 
536              }
537    
538            int avail = limit - pos;
539    
540            if (count < avail)
541              {
542                pos += count;
543                return count;
544              }
545    
546            pos = limit;
547            long todo = count - avail;
548            if (todo > buffer.length)
549              {
550                markPos = -1;
551                todo -= in.skip(todo);
552              }
553            else
554              {
555                while (todo > 0)
556                  {
557                    avail = fill();
558                    if (avail <= 0)
559                      break;
560                    if (avail > todo)
561                      avail = (int) todo;
562                    pos += avail;
563                    todo -= avail;
564                  }
565              }
566            return count - todo;
567          }
568      }
569      
570      private void checkStatus() throws IOException
571      {
572        if (in == null)
573          throw new IOException("Stream closed");
574      }  
575    }