使用内存文件映射MappedByteBuffer读超大文件可能会遇到的问题
这种方式存在一个致命问题就是依然没法读取超大文件(大于 Integer.MAX_VALUE),因为 FileChannel 的 map 方法中 size 参数会有大小限制,源码中发现该参数值大于 Integer.MAX_VALUE 时会直接抛出 IllegalArgumentException(“Size exceeds Integer.MAX_VALUE”) 异常,所以对于特别大的文件其依然不适合。本质上是由于 java.nio.MappedByteBuffer 直接继承自 java.nio.ByteBuffer ,而 ByteBuffer 的索引是 int 类型的,所以 MappedByteBuffer 也只能最大索引到 Integer.MAX_VALUE 的位置,所以 FileChannel 的 map 方法会做参数合法性检查。
我们可以通过多个内存文件映射来解决这个问题,具体如下:
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.tompai.common.hash;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.security.MessageDigest;
import org.apache.commons.codec.digest.DigestUtils;
/**
* @desc: tompai-common
* @name: BigMappedByteBufferReader.java
* @author: tompai
* @email:liinux@qq.com
* @createTime: 2021年4月9日 下午11:39:52
* @history:
* @version: v1.0
*/
public class BigMappedByteBufferReader implements AutoCloseable {
private MappedByteBuffer[] mappedByteBuffers;
private RandomAccessFile inputStream;
private FileChannel fileChannel;
private int bufferCountIndex = 0;
private int bufferCount;
private String MD5Values="";
private int byteBufferSize;
private byte[] byteBuffer;
private static MessageDigest messagedigest = null;
public BigMappedByteBufferReader(String fileName, int byteBufferSize) throws IOException {
this.inputStream = new RandomAccessFile(fileName, "r");
this.fileChannel = inputStream.getChannel();
long fileSize = fileChannel.size();
this.bufferCount = (int) Math.ceil((double) fileSize / (double) Integer.MAX_VALUE);
this.mappedByteBuffers = new MappedByteBuffer[bufferCount];
this.byteBufferSize = byteBufferSize;
long preLength = 0;
long regionSize = Integer.MAX_VALUE;
for (int i = 0; i < bufferCount; i++) {
long readSize = fileSize - preLength;
if (readSize < Integer.MAX_VALUE) {
regionSize = readSize;
}
mappedByteBuffers[i] = fileChannel.map(FileChannel.MapMode.READ_ONLY, preLength, regionSize);
preLength += regionSize;
}
}
public synchronized int read() {
if (bufferCountIndex >= bufferCount) {
return -1;
}
int limit = mappedByteBuffers[bufferCountIndex].limit();
int position = mappedByteBuffers[bufferCountIndex].position();
int realSize = byteBufferSize;
if (limit - position < byteBufferSize) {
realSize = limit - position;
}
byteBuffer = new byte[realSize];
mappedByteBuffers[bufferCountIndex].get(byteBuffer);
// current fragment is end, goto next fragment start.
if (realSize < byteBufferSize && bufferCountIndex < bufferCount) {
bufferCountIndex++;
}
return realSize;
}
public void close() throws IOException {
fileChannel.close();
inputStream.close();
for (MappedByteBuffer byteBuffer : mappedByteBuffers) {
byteBuffer.clear();
}
byteBuffer = null;
}
public synchronized byte[] getCurrentBytes() {
return byteBuffer;
}
}
版权声明:本文为tianshan2010原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。