1 2 /* ==================================================================== 3 * The Apache Software License, Version 1.1 4 * 5 * Copyright (c) 2002 The Apache Software Foundation. All rights 6 * reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. The end-user documentation included with the redistribution, 21 * if any, must include the following acknowledgment: 22 * "This product includes software developed by the 23 * Apache Software Foundation (http://www.apache.org/)." 24 * Alternately, this acknowledgment may appear in the software itself, 25 * if and wherever such third-party acknowledgments normally appear. 26 * 27 * 4. The names "Apache" and "Apache Software Foundation" and 28 * "Apache POI" must not be used to endorse or promote products 29 * derived from this software without prior written permission. For 30 * written permission, please contact apache@apache.org. 31 * 32 * 5. Products derived from this software may not be called "Apache", 33 * "Apache POI", nor may "Apache" appear in their name, without 34 * prior written permission of the Apache Software Foundation. 35 * 36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 * ==================================================================== 49 * 50 * This software consists of voluntary contributions made by many 51 * individuals on behalf of the Apache Software Foundation. For more 52 * information on the Apache Software Foundation, please see 53 * <http://www.apache.org/>. 54 */ 55 56 package org.apache.poi.poifs.filesystem; 57 58 import java.io.*; 59 60 /** 61 * This class provides methods to read a DocumentEntry managed by a 62 * Filesystem instance. 63 * 64 * @author Marc Johnson (mjohnson at apache dot org) 65 */ 66 67 public class DocumentInputStream 68 extends InputStream 69 { 70 71 // current offset into the Document 72 private int _current_offset; 73 74 // current marked offset into the Document (used by mark and 75 // reset) 76 private int _marked_offset; 77 78 // the Document's size 79 private int _document_size; 80 81 // have we been closed? 82 private boolean _closed; 83 84 // the actual Document 85 private POIFSDocument _document; 86 87 // buffer used to read one byte at a time 88 private byte[] _tiny_buffer; 89 90 // returned by read operations if we're at end of document 91 static private final int EOD = -1; 92 93 /** 94 * Create an InputStream from the specified DocumentEntry 95 * 96 * @param document the DocumentEntry to be read 97 * 98 * @exception IOException if the DocumentEntry cannot be opened 99 * (like, maybe it has been deleted?) 100 */ 101 102 public DocumentInputStream(final DocumentEntry document) 103 throws IOException 104 { 105 _current_offset = 0; 106 _marked_offset = 0; 107 _document_size = document.getSize(); 108 _closed = false; 109 _tiny_buffer = null; 110 if (document instanceof DocumentNode) 111 { 112 _document = (( DocumentNode ) document).getDocument(); 113 } 114 else 115 { 116 throw new IOException("Cannot open internal document storage"); 117 } 118 } 119 120 /** 121 * Create an InputStream from the specified Document 122 * 123 * @param document the Document to be read 124 * 125 * @exception IOException if the DocumentEntry cannot be opened 126 * (like, maybe it has been deleted?) 127 */ 128 129 public DocumentInputStream(final POIFSDocument document) 130 throws IOException 131 { 132 _current_offset = 0; 133 _marked_offset = 0; 134 _document_size = document.getSize(); 135 _closed = false; 136 _tiny_buffer = null; 137 _document = document; 138 } 139 140 /** 141 * Returns the number of bytes that can be read (or skipped over) 142 * from this input stream without blocking by the next caller of a 143 * method for this input stream. The next caller might be the same 144 * thread or or another thread. 145 * 146 * @return the number of bytes that can be read from this input 147 * stream without blocking. 148 * 149 * @exception IOException on error (such as the stream has been 150 * closed) 151 */ 152 153 public int available() 154 throws IOException 155 { 156 dieIfClosed(); 157 return _document_size - _current_offset; 158 } 159 160 /** 161 * Closes this input stream and releases any system resources 162 * associated with the stream. 163 * 164 * @exception IOException 165 */ 166 167 public void close() 168 throws IOException 169 { 170 _closed = true; 171 } 172 173 /** 174 * Marks the current position in this input stream. A subsequent 175 * call to the reset method repositions this stream at the last 176 * marked position so that subsequent reads re-read the same 177 * bytes. 178 * <p> 179 * The readlimit arguments tells this input stream to allow that 180 * many bytes to be read before the mark position gets 181 * invalidated. This implementation, however, does not care. 182 * <p> 183 * The general contract of mark is that, if the method 184 * markSupported returns true, the stream somehow remembers all 185 * the bytes read after the call to mark and stands ready to 186 * supply those same bytes again if and whenever the method reset 187 * is called. However, the stream is not required to remember any 188 * data at all if more than readlimit bytes are read from the 189 * stream before reset is called. But this stream will. 190 * 191 * @param ignoredReadlimit the maximum limit of bytes that can be 192 * read before the mark position becomes 193 * invalid. Ignored by this 194 * implementation. 195 */ 196 197 public void mark(int ignoredReadlimit) 198 { 199 _marked_offset = _current_offset; 200 } 201 202 /** 203 * Tests if this input stream supports the mark and reset methods. 204 * 205 * @return true 206 */ 207 208 public boolean markSupported() 209 { 210 return true; 211 } 212 213 /** 214 * Reads the next byte of data from the input stream. The value 215 * byte is returned as an int in the range 0 to 255. If no byte is 216 * available because the end of the stream has been reached, the 217 * value -1 is returned. The definition of this method in 218 * java.io.InputStream allows this method to block, but it won't. 219 * 220 * @return the next byte of data, or -1 if the end of the stream 221 * is reached. 222 * 223 * @exception IOException 224 */ 225 226 public int read() 227 throws IOException 228 { 229 dieIfClosed(); 230 if (atEOD()) 231 { 232 return EOD; 233 } 234 if (_tiny_buffer == null) 235 { 236 _tiny_buffer = new byte[ 1 ]; 237 } 238 _document.read(_tiny_buffer, _current_offset++); 239 return ((int)_tiny_buffer[ 0 ]) & 0x000000FF; 240 } 241 242 /** 243 * Reads some number of bytes from the input stream and stores 244 * them into the buffer array b. The number of bytes actually read 245 * is returned as an integer. The definition of this method in 246 * java.io.InputStream allows this method to block, but it won't. 247 * <p> 248 * If b is null, a NullPointerException is thrown. If the length 249 * of b is zero, then no bytes are read and 0 is returned; 250 * otherwise, there is an attempt to read at least one byte. If no 251 * byte is available because the stream is at end of file, the 252 * value -1 is returned; otherwise, at least one byte is read and 253 * stored into b. 254 * <p> 255 * The first byte read is stored into element b[0], the next one 256 * into b[1], and so on. The number of bytes read is, at most, 257 * equal to the length of b. Let k be the number of bytes actually 258 * read; these bytes will be stored in elements b[0] through 259 * b[k-1], leaving elements b[k] through b[b.length-1] unaffected. 260 * <p> 261 * If the first byte cannot be read for any reason other than end 262 * of file, then an IOException is thrown. In particular, an 263 * IOException is thrown if the input stream has been closed. 264 * <p> 265 * The read(b) method for class InputStream has the same effect as: 266 * <p> 267 * <code>read(b, 0, b.length)</code> 268 * 269 * @param b the buffer into which the data is read. 270 * 271 * @return the total number of bytes read into the buffer, or -1 272 * if there is no more data because the end of the stream 273 * has been reached. 274 * 275 * @exception IOException 276 * @exception NullPointerException 277 */ 278 279 public int read(final byte [] b) 280 throws IOException, NullPointerException 281 { 282 return read(b, 0, b.length); 283 } 284 285 /** 286 * Reads up to len bytes of data from the input stream into an 287 * array of bytes. An attempt is made to read as many as len 288 * bytes, but a smaller number may be read, possibly zero. The 289 * number of bytes actually read is returned as an integer. 290 * <p> 291 * The definition of this method in java.io.InputStream allows it 292 * to block, but it won't. 293 * <p> 294 * If b is null, a NullPointerException is thrown. 295 * <p> 296 * If off is negative, or len is negative, or off+len is greater 297 * than the length of the array b, then an 298 * IndexOutOfBoundsException is thrown. 299 * <p> 300 * If len is zero, then no bytes are read and 0 is returned; 301 * otherwise, there is an attempt to read at least one byte. If no 302 * byte is available because the stream is at end of file, the 303 * value -1 is returned; otherwise, at least one byte is read and 304 * stored into b. 305 * <p> 306 * The first byte read is stored into element b[off], the next one 307 * into b[off+1], and so on. The number of bytes read is, at most, 308 * equal to len. Let k be the number of bytes actually read; these 309 * bytes will be stored in elements b[off] through b[off+k-1], 310 * leaving elements b[off+k] through b[off+len-1] unaffected. 311 * <p> 312 * In every case, elements b[0] through b[off] and elements 313 * b[off+len] through b[b.length-1] are unaffected. 314 * <p> 315 * If the first byte cannot be read for any reason other than end 316 * of file, then an IOException is thrown. In particular, an 317 * IOException is thrown if the input stream has been closed. 318 * 319 * @param b the buffer into which the data is read. 320 * @param off the start offset in array b at which the data is 321 * written. 322 * @param len the maximum number of bytes to read. 323 * 324 * @return the total number of bytes read into the buffer, or -1 325 * if there is no more data because the end of the stream 326 * has been reached. 327 * 328 * @exception IOException 329 * @exception NullPointerException 330 * @exception IndexOutOfBoundsException 331 */ 332 333 public int read(final byte [] b, final int off, final int len) 334 throws IOException, NullPointerException, IndexOutOfBoundsException 335 { 336 dieIfClosed(); 337 if (b == null) 338 { 339 throw new NullPointerException("buffer is null"); 340 } 341 if ((off < 0) || (len < 0) || (b.length < (off + len))) 342 { 343 throw new IndexOutOfBoundsException( 344 "can't read past buffer boundaries"); 345 } 346 if (len == 0) 347 { 348 return 0; 349 } 350 if (atEOD()) 351 { 352 return EOD; 353 } 354 int limit = Math.min(available(), len); 355 356 if ((off == 0) && (limit == b.length)) 357 { 358 _document.read(b, _current_offset); 359 } 360 else 361 { 362 byte[] buffer = new byte[ limit ]; 363 364 _document.read(buffer, _current_offset); 365 System.arraycopy(buffer, 0, b, off, limit); 366 } 367 _current_offset += limit; 368 return limit; 369 } 370 371 /** 372 * Repositions this stream to the position at the time the mark 373 * method was last called on this input stream. 374 * <p> 375 * The general contract of reset is: 376 * <p> 377 * <ul> 378 * <li> 379 * If the method markSupported returns true, then: 380 * <ul> 381 * <li> 382 * If the method mark has not been called since the 383 * stream was created, or the number of bytes read 384 * from the stream since mark was last called is 385 * larger than the argument to mark at that last 386 * call, then an IOException might be thrown. 387 * </li> 388 * <li> 389 * If such an IOException is not thrown, then the 390 * stream is reset to a state such that all the 391 * bytes read since the most recent call to mark 392 * (or since the start of the file, if mark has not 393 * been called) will be resupplied to subsequent 394 * callers of the read method, followed by any 395 * bytes that otherwise would have been the next 396 * input data as of the time of the call to reset. 397 * </li> 398 * </ul> 399 * </li> 400 * <li> 401 * If the method markSupported returns false, then: 402 * <ul> 403 * <li> 404 * The call to reset may throw an IOException. 405 * </li> 406 * <li> 407 * If an IOException is not thrown, then the 408 * stream is reset to a fixed state that depends 409 * on the particular type of the input and how it 410 * was created. The bytes that will be supplied to 411 * subsequent callers of the read method depend on 412 * the particular type of the input stream. 413 * </li> 414 * </ul> 415 * </li> 416 * </ul> 417 * <p> 418 * All well and good ... this class's markSupported method returns 419 * true and this method does not care whether you've called mark 420 * at all, or whether you've exceeded the number of bytes 421 * specified in the last call to mark. We're basically walking a 422 * byte array ... mark and reset to your heart's content. 423 */ 424 425 public void reset() 426 { 427 _current_offset = _marked_offset; 428 } 429 430 /** 431 * Skips over and discards n bytes of data from this input 432 * stream. The skip method may, for a variety of reasons, end up 433 * skipping over some smaller number of bytes, possibly 0. This 434 * may result from any of a number of conditions; reaching end of 435 * file before n bytes have been skipped is only one 436 * possibility. The actual number of bytes skipped is returned. If 437 * n is negative, no bytes are skipped. 438 * 439 * @param n the number of bytes to be skipped. 440 * 441 * @return the actual number of bytes skipped. 442 * 443 * @exception IOException 444 */ 445 446 public long skip(final long n) 447 throws IOException 448 { 449 dieIfClosed(); 450 if (n < 0) 451 { 452 return 0; 453 } 454 int new_offset = _current_offset + ( int ) n; 455 456 if (new_offset < _current_offset) 457 { 458 459 // wrap around in converting a VERY large long to an int 460 new_offset = _document_size; 461 } 462 else if (new_offset > _document_size) 463 { 464 new_offset = _document_size; 465 } 466 long rval = new_offset - _current_offset; 467 468 _current_offset = new_offset; 469 return rval; 470 } 471 472 private void dieIfClosed() 473 throws IOException 474 { 475 if (_closed) 476 { 477 throw new IOException( 478 "cannot perform requested operation on a closed stream"); 479 } 480 } 481 482 private boolean atEOD() 483 { 484 return _current_offset == _document_size; 485 } 486 } // end public class DocumentInputStream 487 488