Skip to content

Commit d8657bc

Browse files
author
Prathyusha Garre
committed
HBASE-29662 - Avoid regionDir/tableDir creation as part of .regioninfo file creation in HRegion initialize
1 parent 47f7e1d commit d8657bc

File tree

13 files changed

+216
-10
lines changed

13 files changed

+216
-10
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,39 @@ public static int getDefaultBufferSize(final FileSystem fs) {
187187
*/
188188
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
189189
boolean overwrite) throws IOException {
190+
return create(fs, path, perm, overwrite, true);
191+
}
192+
193+
/**
194+
* Create the specified file on the filesystem. By default, this will:
195+
* <ol>
196+
* <li>apply the umask in the configuration (if it is enabled)</li>
197+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
198+
* <li>use the default replication</li>
199+
* <li>use the default block size</li>
200+
* <li>not track progress</li>
201+
* </ol>
202+
* @param fs {@link FileSystem} on which to write the file
203+
* @param path {@link Path} to the file to write
204+
* @param perm intial permissions
205+
* @param overwrite Whether or not the created file should be overwritten.
206+
* @param isRecursiveCreate recursively create parent directories
207+
* @return output stream to the created file
208+
* @throws IOException if the file cannot be created
209+
*/
210+
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
211+
boolean overwrite, boolean isRecursiveCreate) throws IOException {
190212
if (LOG.isTraceEnabled()) {
191-
LOG.trace("Creating file={} with permission={}, overwrite={}", path, perm, overwrite);
213+
LOG.trace("Creating file={} with permission={}, overwrite={}, recursive={}", path, perm,
214+
overwrite, isRecursiveCreate);
215+
}
216+
if (isRecursiveCreate) {
217+
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
218+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
219+
} else {
220+
return fs.createNonRecursive(path, perm, overwrite, getDefaultBufferSize(fs),
221+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
192222
}
193-
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
194-
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
195223
}
196224

197225
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/janitor/MetaFixer.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.SortedSet;
3030
import java.util.TreeSet;
3131
import java.util.stream.Collectors;
32+
import org.apache.hadoop.fs.Path;
3233
import org.apache.hadoop.hbase.HConstants;
3334
import org.apache.hadoop.hbase.MetaTableAccessor;
3435
import org.apache.hadoop.hbase.TableName;
@@ -37,9 +38,12 @@
3738
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3839
import org.apache.hadoop.hbase.client.TableDescriptor;
3940
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
41+
import org.apache.hadoop.hbase.master.MasterFileSystem;
4042
import org.apache.hadoop.hbase.master.MasterServices;
4143
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
44+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
4245
import org.apache.hadoop.hbase.util.Bytes;
46+
import org.apache.hadoop.hbase.util.CommonFSUtils;
4347
import org.apache.hadoop.hbase.util.Pair;
4448
import org.apache.yetus.audience.InterfaceAudience;
4549
import org.slf4j.Logger;
@@ -102,6 +106,7 @@ void fixHoles(CatalogJanitorReport report) {
102106

103107
final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
104108
final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
109+
createRegionDirectories(masterServices, newMetaEntries);
105110
final TransitRegionStateProcedure[] assignProcedures =
106111
masterServices.getAssignmentManager().createRoundRobinAssignProcedures(newMetaEntries);
107112

@@ -217,6 +222,27 @@ private static List<RegionInfo> createMetaEntries(final MasterServices masterSer
217222
return createMetaEntriesSuccesses;
218223
}
219224

225+
private static void createRegionDirectories(final MasterServices masterServices,
226+
final List<RegionInfo> regions) {
227+
if (regions.isEmpty()) {
228+
return;
229+
}
230+
final MasterFileSystem mfs = masterServices.getMasterFileSystem();
231+
final Path rootDir = mfs.getRootDir();
232+
for (RegionInfo regionInfo : regions) {
233+
if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
234+
try {
235+
Path tableDir = CommonFSUtils.getTableDir(rootDir, regionInfo.getTable());
236+
HRegionFileSystem.createRegionOnFileSystem(masterServices.getConfiguration(),
237+
mfs.getFileSystem(), tableDir, regionInfo);
238+
} catch (IOException e) {
239+
LOG.warn("Failed to create region directory for {}: {}",
240+
regionInfo.getRegionNameAsString(), e.getMessage());
241+
}
242+
}
243+
}
244+
}
245+
220246
/**
221247
* Fix overlaps noted in CJ consistency report.
222248
*/

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
109109
setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_ONLINE);
110110
break;
111111
case TRUNCATE_REGION_MAKE_ONLINE:
112+
createRegionOnFileSystem(env);
112113
addChildProcedure(createAssignProcedures(env));
113114
setNextState(TruncateRegionState.TRUNCATE_REGION_POST_OPERATION);
114115
break;
@@ -130,6 +131,20 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
130131
return Flow.HAS_MORE_STATE;
131132
}
132133

134+
private void createRegionOnFileSystem(final MasterProcedureEnv env) throws IOException {
135+
RegionStateNode regionNode =
136+
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
137+
regionNode.lock();
138+
try {
139+
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
140+
final Path tableDir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
141+
HRegionFileSystem.createRegionOnFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
142+
tableDir, getRegion());
143+
} finally {
144+
regionNode.unlock();
145+
}
146+
}
147+
133148
private void deleteRegionFromFileSystem(final MasterProcedureEnv env) throws IOException {
134149
RegionStateNode regionNode =
135150
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,10 @@ private static void writeRegionInfoFileContent(final Configuration conf, final F
764764
// First check to get the permissions
765765
FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
766766
// Write the RegionInfo file content
767-
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) {
767+
// HBASE-29662: Fail .regioninfo file creation, if the region directory doesn't exist,
768+
// avoiding silent masking of missing region directories during region initialization.
769+
// The region directory should already exist when this method is called.
770+
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null, false)) {
768771
out.write(content);
769772
}
770773
}
@@ -848,6 +851,14 @@ private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final b
848851
CommonFSUtils.delete(fs, tmpPath, true);
849852
}
850853

854+
// Check parent (region) directory exists first to maintain HBASE-29662 protection
855+
if (!fs.exists(getRegionDir())) {
856+
throw new IOException("Region directory does not exist: " + getRegionDir());
857+
}
858+
if (!fs.exists(getTempDir())) {
859+
fs.mkdirs(getTempDir());
860+
}
861+
851862
// Write HRI to a file in case we need to recover hbase:meta
852863
writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
853864

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,32 @@ public static boolean deleteRegionDir(final Configuration conf, final RegionInfo
212212
*/
213213
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
214214
FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException {
215+
return create(conf, fs, path, perm, favoredNodes, true);
216+
}
217+
218+
/**
219+
* Create the specified file on the filesystem. By default, this will:
220+
* <ol>
221+
* <li>overwrite the file if it exists</li>
222+
* <li>apply the umask in the configuration (if it is enabled)</li>
223+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
224+
* <li>use the configured column family replication or default replication if
225+
* {@link ColumnFamilyDescriptorBuilder#DEFAULT_DFS_REPLICATION}</li>
226+
* <li>use the default block size</li>
227+
* <li>not track progress</li>
228+
* </ol>
229+
* @param conf configurations
230+
* @param fs {@link FileSystem} on which to write the file
231+
* @param path {@link Path} to the file to write
232+
* @param perm permissions
233+
* @param favoredNodes favored data nodes
234+
* @param isRecursiveCreate recursively create parent directories
235+
* @return output stream to the created file
236+
* @throws IOException if the file cannot be created
237+
*/
238+
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
239+
FsPermission perm, InetSocketAddress[] favoredNodes, boolean isRecursiveCreate)
240+
throws IOException {
215241
if (fs instanceof HFileSystem) {
216242
FileSystem backingFs = ((HFileSystem) fs).getBackingFs();
217243
if (backingFs instanceof DistributedFileSystem) {
@@ -230,7 +256,7 @@ public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path
230256
}
231257

232258
}
233-
return CommonFSUtils.create(fs, path, perm, true);
259+
return CommonFSUtils.create(fs, path, perm, true, isRecursiveCreate);
234260
}
235261

236262
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
100100
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
101101
import org.apache.hadoop.hbase.master.HMaster;
102+
import org.apache.hadoop.hbase.master.MasterFileSystem;
102103
import org.apache.hadoop.hbase.master.RegionState;
103104
import org.apache.hadoop.hbase.master.ServerManager;
104105
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
@@ -109,6 +110,7 @@
109110
import org.apache.hadoop.hbase.regionserver.BloomType;
110111
import org.apache.hadoop.hbase.regionserver.ChunkCreator;
111112
import org.apache.hadoop.hbase.regionserver.HRegion;
113+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
112114
import org.apache.hadoop.hbase.regionserver.HRegionServer;
113115
import org.apache.hadoop.hbase.regionserver.HStore;
114116
import org.apache.hadoop.hbase.regionserver.InternalScanner;
@@ -3722,4 +3724,22 @@ public static void await(final long sleepMillis, final BooleanSupplier condition
37223724
throw e;
37233725
}
37243726
}
3727+
3728+
public void createRegionDir(RegionInfo hri) throws IOException {
3729+
Path rootDir = getDataTestDir();
3730+
Path tableDir = CommonFSUtils.getTableDir(rootDir, hri.getTable());
3731+
Path regionDir = new Path(tableDir, hri.getEncodedName());
3732+
FileSystem fs = getTestFileSystem();
3733+
if (!fs.exists(regionDir)) {
3734+
fs.mkdirs(regionDir);
3735+
}
3736+
}
3737+
3738+
public void createRegionDir(RegionInfo regionInfo, MasterFileSystem masterFileSystem)
3739+
throws IOException {
3740+
Path tableDir =
3741+
CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), regionInfo.getTable());
3742+
HRegionFileSystem.createRegionOnFileSystem(conf, masterFileSystem.getFileSystem(), tableDir,
3743+
regionInfo);
3744+
}
37253745
}

hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoreRegionCoprocessor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,14 @@ public void before() throws IOException {
7676
this.rss = new MockRegionServerServices(HTU.getConfiguration());
7777
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
7878
MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
79+
HTU.createRegionDir(ri);
7980
this.region = HRegion.openHRegion(ri, td, null, HTU.getConfiguration(), this.rss, null);
8081
}
8182

8283
@After
8384
public void after() throws IOException {
8485
this.region.close();
86+
HTU.cleanupTestDir();
8587
}
8688

8789
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestMetaFixer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, Reg
172172
throws IOException {
173173
RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
174174
.setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
175+
TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem());
175176
MetaTableAccessor.putsToMetaTable(services.getConnection(),
176177
Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
177178
EnvironmentEdgeManager.currentTime())));

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveConcurrentClose.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,11 @@ private HRegion initHRegion(TableDescriptor htd, RegionInfo info) throws IOExcep
188188
CommonFSUtils.setRootDir(walConf, tableDir);
189189
final WALFactory wals = new WALFactory(walConf, "log_" + info.getEncodedName());
190190
HRegion region = new HRegion(fs, wals.getWAL(info), conf, htd, null);
191-
191+
Path regionDir = new Path(tableDir, info.getEncodedName());
192+
if (!fs.getFileSystem().exists(regionDir)) {
193+
fs.getFileSystem().mkdirs(regionDir);
194+
}
192195
region.initialize();
193-
194196
return region;
195197
}
196198

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveIOException.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ private HRegion initHRegion(TableDescriptor htd, RegionInfo info) throws IOExcep
198198
.rename(eq(new Path(storeDir, ERROR_FILE)), any());
199199

200200
HRegionFileSystem fs = new HRegionFileSystem(conf, errFS, tableDir, info);
201+
fs.createRegionOnFileSystem(conf, fs.getFileSystem(), tableDir, info);
201202
final Configuration walConf = new Configuration(conf);
202203
CommonFSUtils.setRootDir(walConf, tableDir);
203204
final WALFactory wals = new WALFactory(walConf, "log_" + info.getEncodedName());

0 commit comments

Comments
 (0)