001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.build.AbstractOrigin;
031import org.apache.commons.io.build.AbstractStreamBuilder;
032
033/**
034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
036 * configurable.
037 * <p>
038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
040 * memory.
041 * </p>
042 * <p>
043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
044 * use case, the use of buffering may still further improve performance. For example:
045 * </p>
046 * <p>
047 * To build an instance, see {@link Builder}.
048 * </p>
049 * <pre>{@code
050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
051 *   MemoryMappedFileInputStream.builder()
052 *     .setPath(path)
053 *     .setBufferSize(256 * 1024)
054 *     .get()));}
055 * </pre>
056 * <p>
057 * should outperform:
058 * </p>
059 * <pre>
060 * new GzipInputStream(new MemoryMappedFileInputStream(path))
061 * </pre>
062 * <pre>{@code
063 * GzipInputStream s = new GzipInputStream(
064 *   MemoryMappedFileInputStream.builder()
065 *     .setPath(path)
066 *     .setBufferSize(256 * 1024)
067 *     .get());}
068 * </pre>
069 *
070 * @since 2.12.0
071 */
072public final class MemoryMappedFileInputStream extends InputStream {
073
074    /**
075     * Builds a new {@link MemoryMappedFileInputStream} instance.
076     * <p>
077     * For example:
078     * </p>
079     * <pre>{@code
080     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
081     *   .setPath(path)
082     *   .setBufferSize(256 * 1024)
083     *   .get();}
084     * </pre>
085     *
086     * @since 2.12.0
087     */
088    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
089
090        /**
091         * Constructs a new Builder.
092         */
093        public Builder() {
094            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
095            setBufferSize(DEFAULT_BUFFER_SIZE);
096        }
097
098        /**
099         * Constructs a new instance.
100         * <p>
101         * This builder use the aspects Path and buffer size.
102         * </p>
103         * <p>
104         * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an
105         * {@link UnsupportedOperationException}.
106         * </p>
107         *
108         * @return a new instance.
109         * @throws UnsupportedOperationException if the origin cannot provide a Path.
110         * @see AbstractOrigin#getPath()
111         */
112        @Override
113        public MemoryMappedFileInputStream get() throws IOException {
114            return new MemoryMappedFileInputStream(getPath(), getBufferSize());
115        }
116    }
117
118    /**
119     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
120     * Increasing the value beyond the default size will generally not provide any increase in throughput.
121     */
122    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
123
124    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
125
126    /**
127     * Constructs a new {@link Builder}.
128     *
129     * @return a new {@link Builder}.
130     * @since 2.12.0
131     */
132    public static Builder builder() {
133        return new Builder();
134    }
135
136    private final int bufferSize;
137    private final FileChannel channel;
138    private ByteBuffer buffer = EMPTY_BUFFER;
139    private boolean closed;
140
141    /**
142     * The starting position (within the file) of the next sliding buffer.
143     */
144    private long nextBufferPosition;
145
146    /**
147     * Constructs a new instance.
148     *
149     * @param file The path of the file to open.
150     * @param bufferSize Size of the sliding buffer.
151     * @throws IOException If an I/O error occurs.
152     */
153    private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
154        this.bufferSize = bufferSize;
155        this.channel = FileChannel.open(file, StandardOpenOption.READ);
156    }
157
158    @Override
159    public int available() throws IOException {
160        return buffer.remaining();
161    }
162
163    private void cleanBuffer() {
164        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
165            ByteBufferCleaner.clean(buffer);
166        }
167    }
168
169    @Override
170    public void close() throws IOException {
171        if (!closed) {
172            cleanBuffer();
173            buffer = null;
174            channel.close();
175            closed = true;
176        }
177    }
178
179    private void ensureOpen() throws IOException {
180        if (closed) {
181            throw new IOException("Stream closed");
182        }
183    }
184
185    int getBufferSize() {
186        return bufferSize;
187    }
188
189    private void nextBuffer() throws IOException {
190        final long remainingInFile = channel.size() - nextBufferPosition;
191        if (remainingInFile > 0) {
192            final long amountToMap = Math.min(remainingInFile, bufferSize);
193            cleanBuffer();
194            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
195            nextBufferPosition += amountToMap;
196        } else {
197            buffer = EMPTY_BUFFER;
198        }
199    }
200
201    @Override
202    public int read() throws IOException {
203        ensureOpen();
204        if (!buffer.hasRemaining()) {
205            nextBuffer();
206            if (!buffer.hasRemaining()) {
207                return EOF;
208            }
209        }
210        return Short.toUnsignedInt(buffer.get());
211    }
212
213    @Override
214    public int read(final byte[] b, final int off, final int len) throws IOException {
215        ensureOpen();
216        if (!buffer.hasRemaining()) {
217            nextBuffer();
218            if (!buffer.hasRemaining()) {
219                return EOF;
220            }
221        }
222        final int numBytes = Math.min(buffer.remaining(), len);
223        buffer.get(b, off, numBytes);
224        return numBytes;
225    }
226
227    @Override
228    public long skip(final long n) throws IOException {
229        ensureOpen();
230        if (n <= 0) {
231            return 0;
232        }
233        if (n <= buffer.remaining()) {
234            buffer.position((int) (buffer.position() + n));
235            return n;
236        }
237        final long remainingInFile = channel.size() - nextBufferPosition;
238        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
239        nextBufferPosition += skipped - buffer.remaining();
240        nextBuffer();
241        return skipped;
242    }
243
244}