001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.List;
035
036import org.apache.commons.io.Charsets;
037import org.apache.commons.io.FileSystem;
038import org.apache.commons.io.StandardLineSeparator;
039import org.apache.commons.io.build.AbstractOrigin;
040import org.apache.commons.io.build.AbstractStreamBuilder;
041
042/**
043 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
044 * <p>
045 * To build an instance, see {@link Builder}.
046 * </p>
047 *
048 * @since 2.2
049 */
050public class ReversedLinesFileReader implements Closeable {
051
052    /**
053     * Builds a new {@link ReversedLinesFileReader} instance.
054     * <p>
055     * For example:
056     * </p>
057     * <pre>{@code
058     * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
059     *   .setPath(path)
060     *   .setBufferSize(4096)
061     *   .setCharset(StandardCharsets.UTF_8)
062     *   .get();}
063     * </pre>
064     *
065     * @since 2.12.0
066     */
067    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
068
069        /**
070         * Constructs a new Builder.
071         */
072        public Builder() {
073            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
074            setBufferSize(DEFAULT_BLOCK_SIZE);
075        }
076
077        /**
078         * Constructs a new instance.
079         * <p>
080         * This builder use the aspects Path, Charset, buffer size.
081         * </p>
082         * <p>
083         * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an
084         * {@link UnsupportedOperationException}.
085         * </p>
086         *
087         * @return a new instance.
088         * @throws UnsupportedOperationException if the origin cannot provide a Path.
089         * @see AbstractOrigin#getPath()
090         */
091        @Override
092        public ReversedLinesFileReader get() throws IOException {
093            return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
094        }
095
096    }
097
098    private final class FilePart {
099        private final long no;
100
101        private final byte[] data;
102
103        private byte[] leftOver;
104
105        private int currentLastBytePos;
106
107        /**
108         * Constructs a new instance.
109         *
110         * @param no                     the part number
111         * @param length                 its length
112         * @param leftOverOfLastFilePart remainder
113         * @throws IOException if there is a problem reading the file
114         */
115        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
116            this.no = no;
117            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
118            this.data = new byte[dataLength];
119            final long off = (no - 1) * blockSize;
120
121            // read data
122            if (no > 0 /* file not empty */) {
123                channel.position(off);
124                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
125                if (countRead != length) {
126                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
127                }
128            }
129            // copy left over part into data arr
130            if (leftOverOfLastFilePart != null) {
131                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
132            }
133            this.currentLastBytePos = data.length - 1;
134            this.leftOver = null;
135        }
136
137        /**
138         * Constructs the buffer containing any leftover bytes.
139         */
140        private void createLeftOver() {
141            final int lineLengthBytes = currentLastBytePos + 1;
142            if (lineLengthBytes > 0) {
143                // create left over for next block
144                leftOver = Arrays.copyOf(data, lineLengthBytes);
145            } else {
146                leftOver = null;
147            }
148            currentLastBytePos = -1;
149        }
150
151        /**
152         * Finds the new-line sequence and return its length.
153         *
154         * @param data buffer to scan
155         * @param i    start offset in buffer
156         * @return length of newline sequence or 0 if none found
157         */
158        private int getNewLineMatchByteCount(final byte[] data, final int i) {
159            for (final byte[] newLineSequence : newLineSequences) {
160                boolean match = true;
161                for (int j = newLineSequence.length - 1; j >= 0; j--) {
162                    final int k = i + j - (newLineSequence.length - 1);
163                    match &= k >= 0 && data[k] == newLineSequence[j];
164                }
165                if (match) {
166                    return newLineSequence.length;
167                }
168            }
169            return 0;
170        }
171
172        /**
173         * Reads a line.
174         *
175         * @return the line or null
176         */
177        private String readLine() { //NOPMD Bug in PMD
178
179            String line = null;
180            int newLineMatchByteCount;
181
182            final boolean isLastFilePart = no == 1;
183
184            int i = currentLastBytePos;
185            while (i > -1) {
186
187                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
188                    // avoidNewlineSplitBuffer: for all except the last file part we
189                    // take a few bytes to the next file part to avoid splitting of newlines
190                    createLeftOver();
191                    break; // skip last few bytes and leave it to the next file part
192                }
193
194                // --- check for newline ---
195                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
196                    final int lineStart = i + 1;
197                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
198
199                    if (lineLengthBytes < 0) {
200                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
201                    }
202                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
203
204                    line = new String(lineData, charset);
205
206                    currentLastBytePos = i - newLineMatchByteCount;
207                    break; // found line
208                }
209
210                // --- move cursor ---
211                i -= byteDecrement;
212
213                // --- end of file part handling ---
214                if (i < 0) {
215                    createLeftOver();
216                    break; // end of file part
217                }
218            }
219
220            // --- last file part handling ---
221            if (isLastFilePart && leftOver != null) {
222                // there will be no line break anymore, this is the first line of the file
223                line = new String(leftOver, charset);
224                leftOver = null;
225            }
226
227            return line;
228        }
229
230        /**
231         * Handles block rollover
232         *
233         * @return the new FilePart or null
234         * @throws IOException if there was a problem reading the file
235         */
236        private FilePart rollOver() throws IOException {
237
238            if (currentLastBytePos > -1) {
239                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
240                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
241            }
242
243            if (no > 1) {
244                return new FilePart(no - 1, blockSize, leftOver);
245            }
246            // NO 1 was the last FilePart, we're finished
247            if (leftOver != null) {
248                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
249                        + new String(leftOver, charset));
250            }
251            return null;
252        }
253    }
254
255    private static final String EMPTY_STRING = "";
256
257    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
258
259    /**
260     * Constructs a new {@link Builder}.
261     *
262     * @return a new {@link Builder}.
263     * @since 2.12.0
264     */
265    public static Builder builder() {
266        return new Builder();
267    }
268
269    private final int blockSize;
270    private final Charset charset;
271    private final SeekableByteChannel channel;
272    private final long totalByteLength;
273    private final long totalBlockCount;
274    private final byte[][] newLineSequences;
275    private final int avoidNewlineSplitBufferSize;
276    private final int byteDecrement;
277    private FilePart currentFilePart;
278    private boolean trailingNewlineOfFileSkipped;
279
280    /**
281     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
282     * platform's default encoding.
283     *
284     * @param file the file to be read
285     * @throws IOException if an I/O error occurs.
286     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
287     */
288    @Deprecated
289    public ReversedLinesFileReader(final File file) throws IOException {
290        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
291    }
292
293    /**
294     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
295     * specified encoding.
296     *
297     * @param file    the file to be read
298     * @param charset the charset to use, null uses the default Charset.
299     * @throws IOException if an I/O error occurs.
300     * @since 2.5
301     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
302     */
303    @Deprecated
304    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
305        this(file.toPath(), charset);
306    }
307
308    /**
309     * Constructs a ReversedLinesFileReader with the given block size and encoding.
310     *
311     * @param file      the file to be read
312     * @param blockSize size of the internal buffer (for ideal performance this
313     *                  should match with the block size of the underlying file
314     *                  system).
315     * @param charset  the encoding of the file, null uses the default Charset.
316     * @throws IOException if an I/O error occurs.
317     * @since 2.3
318     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
319     */
320    @Deprecated
321    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
322        this(file.toPath(), blockSize, charset);
323    }
324
325    /**
326     * Constructs a ReversedLinesFileReader with the given block size and encoding.
327     *
328     * @param file      the file to be read
329     * @param blockSize size of the internal buffer (for ideal performance this
330     *                  should match with the block size of the underlying file
331     *                  system).
332     * @param charsetName  the encoding of the file, null uses the default Charset.
333     * @throws IOException                                  if an I/O error occurs
334     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
335     *                                                      {@link UnsupportedEncodingException}
336     *                                                      in version 2.2 if the
337     *                                                      encoding is not
338     *                                                      supported.
339     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
340     */
341    @Deprecated
342    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
343        this(file.toPath(), blockSize, charsetName);
344    }
345
346    /**
347     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
348     * specified encoding.
349     *
350     * @param file    the file to be read
351     * @param charset the charset to use, null uses the default Charset.
352     * @throws IOException if an I/O error occurs.
353     * @since 2.7
354     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
355     */
356    @Deprecated
357    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
358        this(file, DEFAULT_BLOCK_SIZE, charset);
359    }
360
361    /**
362     * Constructs a ReversedLinesFileReader with the given block size and encoding.
363     *
364     * @param file      the file to be read
365     * @param blockSize size of the internal buffer (for ideal performance this
366     *                  should match with the block size of the underlying file
367     *                  system).
368     * @param charset  the encoding of the file, null uses the default Charset.
369     * @throws IOException if an I/O error occurs.
370     * @since 2.7
371     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
372     */
373    @Deprecated
374    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
375        this.blockSize = blockSize;
376        this.charset = Charsets.toCharset(charset);
377
378        // --- check & prepare encoding ---
379        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
380        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
381        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
382            // all one byte encodings are no problem
383            byteDecrement = 1;
384        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
385        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
386                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
387                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
388                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
389                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
390            byteDecrement = 1;
391        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
392            // UTF-16 new line sequences are not allowed as second tuple of four byte
393            // sequences,
394            // however byte order has to be specified
395            byteDecrement = 2;
396        } else if (this.charset == StandardCharsets.UTF_16) {
397            throw new UnsupportedEncodingException(
398                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
399        } else {
400            throw new UnsupportedEncodingException(
401                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
402        }
403
404        // NOTE: The new line sequences are matched in the order given, so it is
405        // important that \r\n is BEFORE \n
406        this.newLineSequences = new byte[][] {
407            StandardLineSeparator.CRLF.getBytes(this.charset),
408            StandardLineSeparator.LF.getBytes(this.charset),
409            StandardLineSeparator.CR.getBytes(this.charset)
410        };
411
412        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
413
414        // Open file
415        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
416        this.totalByteLength = channel.size();
417        int lastBlockLength = (int) (this.totalByteLength % blockSize);
418        if (lastBlockLength > 0) {
419            this.totalBlockCount = this.totalByteLength / blockSize + 1;
420        } else {
421            this.totalBlockCount = this.totalByteLength / blockSize;
422            if (this.totalByteLength > 0) {
423                lastBlockLength = blockSize;
424            }
425        }
426        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
427
428    }
429
430    /**
431     * Constructs a ReversedLinesFileReader with the given block size and encoding.
432     *
433     * @param file        the file to be read
434     * @param blockSize   size of the internal buffer (for ideal performance this
435     *                    should match with the block size of the underlying file
436     *                    system).
437     * @param charsetName the encoding of the file, null uses the default Charset.
438     * @throws IOException                                  if an I/O error occurs
439     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
440     *                                                      {@link UnsupportedEncodingException}
441     *                                                      in version 2.2 if the
442     *                                                      encoding is not
443     *                                                      supported.
444     * @since 2.7
445     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
446     */
447    @Deprecated
448    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
449        this(file, blockSize, Charsets.toCharset(charsetName));
450    }
451
452    /**
453     * Closes underlying resources.
454     *
455     * @throws IOException if an I/O error occurs.
456     */
457    @Override
458    public void close() throws IOException {
459        channel.close();
460    }
461
462    /**
463     * Returns the lines of the file from bottom to top.
464     *
465     * @return the next line or null if the start of the file is reached
466     * @throws IOException if an I/O error occurs.
467     */
468    public String readLine() throws IOException {
469
470        String line = currentFilePart.readLine();
471        while (line == null) {
472            currentFilePart = currentFilePart.rollOver();
473            if (currentFilePart == null) {
474                // no more FileParts: we're done, leave line set to null
475                break;
476            }
477            line = currentFilePart.readLine();
478        }
479
480        // aligned behavior with BufferedReader that doesn't return a last, empty line
481        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
482            trailingNewlineOfFileSkipped = true;
483            line = readLine();
484        }
485
486        return line;
487    }
488
489    /**
490     * Returns {@code lineCount} lines of the file from bottom to top.
491     * <p>
492     * If there are less than {@code lineCount} lines in the file, then that's what
493     * you get.
494     * </p>
495     * <p>
496     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
497     * </p>
498     *
499     * @param lineCount How many lines to read.
500     * @return A new list
501     * @throws IOException if an I/O error occurs.
502     * @since 2.8.0
503     */
504    public List<String> readLines(final int lineCount) throws IOException {
505        if (lineCount < 0) {
506            throw new IllegalArgumentException("lineCount < 0");
507        }
508        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
509        for (int i = 0; i < lineCount; i++) {
510            final String line = readLine();
511            if (line == null) {
512                return arrayList;
513            }
514            arrayList.add(line);
515        }
516        return arrayList;
517    }
518
519    /**
520     * Returns the last {@code lineCount} lines of the file.
521     * <p>
522     * If there are less than {@code lineCount} lines in the file, then that's what
523     * you get.
524     * </p>
525     *
526     * @param lineCount How many lines to read.
527     * @return A String.
528     * @throws IOException if an I/O error occurs.
529     * @since 2.8.0
530     */
531    public String toString(final int lineCount) throws IOException {
532        final List<String> lines = readLines(lineCount);
533        Collections.reverse(lines);
534        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
535    }
536
537}