Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.FastStringPool;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.yetus.audience.InterfaceAudience;

/**
Expand All @@ -38,8 +37,6 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable {

private static final FastStringPool CF_NAME_POOL = new FastStringPool();

// New compressed format using integer file ID (when codec is available)

private final String hfileName;

private final String regionName;
Expand Down Expand Up @@ -90,6 +87,8 @@ public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica,
* @param offset Offset of the block into the file
* @param isPrimaryReplica Whether this is from primary replica
* @param blockType Type of block
* @param archived Whether the file is archived or not. This is relevant for proper cache
* metrics computation.
*/
public BlockCacheKey(String hfileName, String cfName, String regionName, long offset,
boolean isPrimaryReplica, BlockType blockType, boolean archived) {
Expand All @@ -105,17 +104,18 @@ public BlockCacheKey(String hfileName, String cfName, String regionName, long of

/**
* Construct a new BlockCacheKey using a file path. File, column family and region information
* will be extracted from the passed path. This should be used when inserting keys into the cache,
* so that region cache metrics are recorded properly.
* will be extracted from the passed path, so that region cache metrics are recorded properly.
* NOTE: This should be used when inserting keys into the cache from tests only, as the region and
* cf name parsing from the path can be costly and induce a 10% performance degradation on the
* read path, when this parsing is performed on each key creation time.
* @param hfilePath The path to the HFile
* @param offset Offset of the block into the file
* @param isPrimaryReplica Whether this is from primary replica
* @param blockType Type of block
*/
public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType) {
this(hfilePath.getName(), hfilePath.getParent().getName(),
hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica, blockType,
HFileArchiveUtil.isHFileArchived(hfilePath));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why here we can use false directly?

@wchevreuil wchevreuil Jun 15, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are actually only using this constructor on tests now. Because the HFileUtils.isArchived call seems too expensive to be performed on every block cache key creation, that check should be delegated to the reader, which can do it once during it's initialisation. Writers don't even need it, since a being written file is guaranteed to be "non-archived", so writers can benefit from constructors that alwas set archived as false. Let me add javadoc to explain this.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then why not just remoev this flag in BlockCacheKey if it does not change?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might change when it's created by a Reader (see HFileReaderImpl#1352).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then can we just remove this constructor? Or leave the code as is since it only calls from test code? It does not affect real performance if it only calls in test code right?

hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica, blockType, false);
}

@Override
Expand Down Expand Up @@ -194,5 +194,4 @@ public void setBlockType(BlockType blockType) {
public boolean isArchived() {
return archived;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.IdLock;
import org.apache.hadoop.hbase.util.ObjectIntPair;
import org.apache.hadoop.io.WritableUtils;
Expand Down Expand Up @@ -124,6 +125,14 @@ public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
/** Minor versions starting with this number have faked index key */
static final int MINOR_VERSION_WITH_FAKED_KEY = 3;

private final String fileName;

private final String region;

private final String family;

private final boolean archived;

/**
* Opens a HFile.
* @param context Reader context info
Expand All @@ -149,6 +158,10 @@ public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig ca
fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
fileName = path.getName();
family = path.getParent().getName();
region = path.getParent().getParent().getName();
archived = HFileArchiveUtil.isHFileArchived(path);
}

@SuppressWarnings("serial")
Expand Down Expand Up @@ -1246,8 +1259,8 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws
synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
// Check cache for block. If found return.
long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
BlockCacheKey cacheKey =
new BlockCacheKey(path, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
BlockCacheKey cacheKey = new BlockCacheKey(fileName, family, region, metaBlockOffset,
this.isPrimaryReplicaReader(), BlockType.META, archived);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is one of the optimization right? We calculate archived once when constructing the HFileReaderImpl, and do not need to calcuate it every time when constructing a BlockCacheKey?


cacheBlock &=
cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory(), getHFileInfo(), conf);
Expand Down Expand Up @@ -1336,9 +1349,8 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo
// the other choice is to duplicate work (which the cache would prevent you
// from doing).

BlockCacheKey cacheKey =
new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);

BlockCacheKey cacheKey = new BlockCacheKey(fileName, family, region, dataBlockOffset,
this.isPrimaryReplicaReader(), expectedBlockType, archived);
boolean useLock = false;
IdLock.Entry lockEntry = null;
final Span span = Span.current();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,21 @@ public void setTimeRangeTrackerForTiering(Supplier<TimeRangeTracker> timeRangeTr
private final TimeRangeTracker timeRangeTracker;
private long earliestPutTs = HConstants.LATEST_TIMESTAMP;

private String regionName;

private String familyName;

public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path,
FSDataOutputStream outputStream, HFileContext fileContext) {
this.outputStream = outputStream;
this.path = path;
this.name = path != null ? path.getName() : outputStream.toString();
if (path != null) {
this.name = path.getName();
this.regionName = path.getParent().getParent().getName();
this.familyName = path.getParent().getName();
} else {
this.name = outputStream.toString();
}
this.hFileContext = fileContext;
// TODO: Move this back to upper layer
this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC);
Expand Down Expand Up @@ -593,7 +603,7 @@ private void doCacheOnWrite(long offset) {

private BlockCacheKey buildCacheBlockKey(long offset, BlockType blockType) {
if (path != null) {
return new BlockCacheKey(path, offset, true, blockType);
return new BlockCacheKey(name, familyName, regionName, offset, true, blockType, false);
}
return new BlockCacheKey(name, offset, true, blockType);
}
Expand Down
Loading