Skip to content

Commit

Permalink
HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPService…
Browse files Browse the repository at this point in the history
…Actor goes for infinite loop. Contributed by Rushabh Shah.
  • Loading branch information
kihwal committed May 11, 2015
1 parent cbea5d2 commit ea11590
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 0 deletions.
3 changes: 3 additions & 0 deletions hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED
HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
request is in edit log. (Rushabh S Shah via kihwal)

HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor
goes for infinite loop (Rushabh S Shah via kihwal)

Release 2.7.0 - 2015-04-20

INCOMPATIBLE CHANGES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.ipc.RemoteException;


/**
Expand All @@ -43,6 +44,9 @@ public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode,
DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
try {
bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
} catch (RemoteException re) {
DataNode.LOG.info("trySendErrorReport encountered RemoteException "
+ "errorMessage: " + errorMessage + " errorCode: " + errorCode, re);
} catch(IOException e) {
throw new BPServiceActorActionException("Error reporting "
+ "an error to namenode: ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.ipc.RemoteException;

/**
* ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
Expand Down Expand Up @@ -59,6 +60,9 @@ public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode,

try {
bpNamenode.reportBadBlocks(locatedBlock);
} catch (RemoteException re) {
DataNode.LOG.info("reportBadBlock encountered RemoteException for "
+ "block: " + block , re);
} catch (IOException e) {
throw new BPServiceActorActionException("Failed to report bad block "
+ block + " to namenode: ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.util.Time;
Expand Down Expand Up @@ -621,4 +624,55 @@ public Void answer(InvocationOnMock invocation) throws Throwable {
bpos.stop();
}
}

/**
* This test case doesn't add the reportBadBlock request to
* {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws
* {@link StandbyException}
* @throws Exception
*/
@Test
public void testReportBadBlocksWhenNNThrowsStandbyException()
throws Exception {
BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
bpos.start();
try {
waitForInitialization(bpos);
// Should start with neither NN as active.
assertNull(bpos.getActiveNN());
// Have NN1 claim active at txid 1
mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
bpos.triggerHeartbeatForTests();
// Now mockNN1 is acting like active namenode and mockNN2 as Standby
assertSame(mockNN1, bpos.getActiveNN());
// Return nothing when active Active Namenode calls reportBadBlocks
Mockito.doNothing().when(mockNN1).reportBadBlocks
(Mockito.any(LocatedBlock[].class));

RemoteException re = new RemoteException(StandbyException.class.
getName(), "Operation category WRITE is not supported in state "
+ "standby", RpcErrorCodeProto.ERROR_APPLICATION);
// Return StandbyException wrapped in RemoteException when Standby NN
// calls reportBadBlocks
Mockito.doThrow(re).when(mockNN2).reportBadBlocks
(Mockito.any(LocatedBlock[].class));

bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK)
.getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK)
.getStorageType());
// Send heartbeat so that the BpServiceActor can report bad block to
// namenode
bpos.triggerHeartbeatForTests();
Mockito.verify(mockNN2, Mockito.times(1))
.reportBadBlocks(Mockito.any(LocatedBlock[].class));

// Trigger another heartbeat, this will send reportBadBlock again if it
// is present in the queue.
bpos.triggerHeartbeatForTests();
Mockito.verify(mockNN2, Mockito.times(1))
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
} finally {
bpos.stop();
}
}
}

0 comments on commit ea11590

Please sign in to comment.