Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ private DatanodeDetails initializeDatanodeDetails()
File idFile = new File(idFilePath);
DatanodeDetails details;
if (idFile.exists()) {
details = ContainerUtils.readDatanodeDetailsFrom(idFile);
details = ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
} else {
// There is no datanode.id file, this might be the first time datanode
// is started.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.Objects;
import java.util.Properties;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -49,10 +51,12 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.utils.HddsServerUtil;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.slf4j.Logger;
Expand Down Expand Up @@ -166,25 +170,68 @@ public static synchronized void writeDatanodeDetailsTo(
* @return {@link DatanodeDetails}
* @throws IOException If the id file is malformed or other I/O exceptions
*/
public static synchronized DatanodeDetails readDatanodeDetailsFrom(File path)
throws IOException {
public static synchronized DatanodeDetails readDatanodeDetailsFrom(
File path, ConfigurationSource conf) throws IOException {
if (!path.exists()) {
throw new IOException("Datanode ID file not found.");
}
try {
return DatanodeIdYaml.readDatanodeIdFile(path);
} catch (IOException e) {
LOG.warn("Error loading DatanodeDetails yaml from {}",
path.getAbsolutePath(), e);
// Try to load as protobuf before giving up
try (InputStream in = Files.newInputStream(path.toPath())) {
return DatanodeDetails.getFromProtoBuf(
HddsProtos.DatanodeDetailsProto.parseFrom(in));
} catch (IOException io) {
throw new IOException("Failed to parse DatanodeDetails from "
+ path.getAbsolutePath(), io);
LOG.warn("Failed to read Datanode ID file as YAML. " +
"Attempting recovery.", e);
try {
return recoverDatanodeDetailsFromVersionFile(path, conf);
} catch (IOException recoveryEx) {
LOG.warn("Datanode ID recovery from VERSION file failed. " +
"Falling back to reading as Protobuf.", recoveryEx);
try {
return readDatanodeDetailsFromProto(path);
} catch (IOException io) {
throw new IOException("Failed to parse DatanodeDetails from "
+ path.getAbsolutePath(), io);
}
}
}
}

/**
* Recover DatanodeDetails from VERSION file.
*/
private static DatanodeDetails recoverDatanodeDetailsFromVersionFile(
File path, ConfigurationSource conf) throws IOException {
LOG.info("Attempting to recover Datanode ID from VERSION file.");
String dnUuid = null;
Collection<String> dataNodeDirs =
HddsServerUtil.getDatanodeStorageDirs(conf);
for (String dataNodeDir : dataNodeDirs) {
File versionFile = new File(dataNodeDir, HddsVolume.HDDS_VOLUME_DIR + "/" + StorageVolumeUtil.VERSION_FILE);
if (versionFile.exists()) {
Properties props = DatanodeVersionFile.readFrom(versionFile);
dnUuid = props.getProperty(OzoneConsts.DATANODE_UUID);
if (dnUuid != null && !dnUuid.isEmpty()) {
break;
}
}
}
if (dnUuid == null) {
throw new IOException("Could not find a valid datanode UUID from " +
"any VERSION file in " + dataNodeDirs);
}
DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
builder.setUuid(UUID.fromString(dnUuid));
DatanodeDetails datanodeDetails = builder.build();
DatanodeIdYaml.createDatanodeIdFile(datanodeDetails, path, conf);
LOG.info("Successfully recovered and rewrote datanode ID file.");
return datanodeDetails;
}

private static DatanodeDetails readDatanodeDetailsFromProto(File path)
throws IOException {
try (InputStream in = Files.newInputStream(path.toPath())) {
return DatanodeDetails.getFromProtoBuf(
HddsProtos.DatanodeDetailsProto.parseFrom(in));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ public static DatanodeDetails readDatanodeIdFile(File path)
throw new IOException("Unable to parse yaml file.", e);
}

if (datanodeDetailsYaml == null
|| datanodeDetailsYaml.getUuid() == null
|| datanodeDetailsYaml.getUuid().isEmpty()) {
throw new IOException(
"Datanode ID file is empty or has null UUID: " + path.getAbsolutePath());
}

DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
builder.setUuid(UUID.fromString(datanodeDetailsYaml.getUuid()))
.setIpAddress(datanodeDetailsYaml.getIpAddress())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
*/
public final class StorageVolumeUtil {

private static final String VERSION_FILE = "VERSION";
public static final String VERSION_FILE = "VERSION";
private static final String STORAGE_ID_PREFIX = "DS-";

private StorageVolumeUtil() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.apache.hadoop.ozone.container.ContainerTestHelper.getDummyCommandRequestProto;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.when;
Expand All @@ -37,6 +38,7 @@
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.UUID;
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
Expand All @@ -45,6 +47,7 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ByteStringConversion;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.ozone.common.ChunkBuffer;
import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -125,34 +128,71 @@ public void testDatanodeIDPersistent(@TempDir File tempDir) throws Exception {
// Read should return an empty value if file doesn't exist
File nonExistFile = new File(tempDir, "non_exist.id");
assertThrows(IOException.class,
() -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile));
() -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile, conf));

// Read should fail if the file is malformed
File malformedFile = new File(tempDir, "malformed.id");
createMalformedIDFile(malformedFile);
assertThrows(IOException.class,
() -> ContainerUtils.readDatanodeDetailsFrom(malformedFile));
() -> ContainerUtils.readDatanodeDetailsFrom(malformedFile, conf));

// Test upgrade scenario - protobuf file instead of yaml
File protoFile = new File(tempDir, "valid-proto.id");
try (OutputStream out = Files.newOutputStream(protoFile.toPath())) {
HddsProtos.DatanodeDetailsProto proto = id1.getProtoBufMessage();
proto.writeTo(out);
}
assertDetailsEquals(id1, ContainerUtils.readDatanodeDetailsFrom(protoFile));
assertDetailsEquals(id1, ContainerUtils.readDatanodeDetailsFrom(protoFile, conf));

id1.setInitialVersion(1);
assertWriteRead(tempDir, id1);
}
}

@Test
public void testDatanodeIdRecovery(@TempDir File tempDir) throws IOException {
// 1. Setup storage directory and VERSION file
String datanodeUuid = UUID.randomUUID().toString();
File storageDir = new File(tempDir, "datanode-storage");
assertTrue(storageDir.mkdirs());
conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, storageDir.getAbsolutePath());

File hddsSubDir = new File(storageDir, "hdds");
assertTrue(hddsSubDir.mkdirs());
File versionFile = new File(hddsSubDir, "VERSION");
DatanodeVersionFile dnVersionFile = new DatanodeVersionFile(
"storage-id", "cluster-id", datanodeUuid, System.currentTimeMillis(), 0);
dnVersionFile.createVersionFile(versionFile);

// 2. Simulate a corrupted/empty datanode.id file
File datanodeIdFile = new File(tempDir, "datanode.id");
assertTrue(datanodeIdFile.createNewFile());

assertEquals(0, datanodeIdFile.length(), "Datanode ID file should be empty initially");

// 3. Call readDatanodeDetailsFrom and verify recovery
DatanodeDetails recoveredDetails =
ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);

// 4. Assertions
// Recovered UUID matches the one in the VERSION file
assertEquals(datanodeUuid, recoveredDetails.getUuidString());

// datanode.id file is recreated and is not empty
assertTrue(datanodeIdFile.length() > 0, "Datanode ID file should have been recreated with content");

// The recreated file can be read normally and contains the correct UUID
DatanodeDetails finalDetails = ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);
assertEquals(datanodeUuid, finalDetails.getUuidString());
}

private void assertWriteRead(@TempDir File tempDir,
DatanodeDetails details) throws IOException {
// Write a single ID to the file and read it out
File file = new File(tempDir, "valid-values.id");
ContainerUtils.writeDatanodeDetailsTo(details, file, conf);

DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);

assertDetailsEquals(details, read);
assertEquals(details.getCurrentVersion(), read.getCurrentVersion());
Expand All @@ -163,7 +203,7 @@ private void assertWriteReadWithChangedIpAddress(@TempDir File tempDir,
// Write a single ID to the file and read it out
File file = new File(tempDir, "valid-values.id");
ContainerUtils.writeDatanodeDetailsTo(details, file, conf);
DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);
assertEquals(details.getIpAddress(), read.getIpAddress());
read.validateDatanodeIpAddress();
assertEquals("127.0.0.1", read.getIpAddress());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static DatanodeDetails getDatanodeDetails(OzoneConfiguration conf)
File idFile = new File(idFilePath);
Preconditions.checkState(idFile.exists(),
"Datanode id file: " + idFilePath + " not exists");
return ContainerUtils.readDatanodeDetailsFrom(idFile);
return ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
}

public static File getVolumeUpgradeCompleteFile(HddsVolume volume) {
Expand Down