Skip to content

Commit

Permalink
MAPREDUCE-6324. Fixed MapReduce uber jobs to not fail the udpate of A…
Browse files Browse the repository at this point in the history
…M-RM tokens when they roll-over. Contributed by Jason Lowe.
  • Loading branch information
vinoduec committed Apr 27, 2015
1 parent 32cd2c8 commit 9fc32c5
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 11 deletions.
4 changes: 4 additions & 0 deletions hadoop-mapreduce-project/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Hadoop MapReduce Change Log
Trunk (Unreleased)

INCOMPATIBLE CHANGES

MAPREDUCE-5785. Derive heap size or mapreduce.*.memory.mb automatically.
(Gera Shegalov and Karthik Kambatla via gera)

Expand Down Expand Up @@ -372,6 +373,9 @@ Release 2.7.1 - UNRELEASED
MAPREDUCE-6238. MR2 can't run local jobs with -libjars command options
which is a regression from MR1 (zxu via rkanter)

MAPREDUCE-6324. Fixed MapReduce uber jobs to not fail the udpate of AM-RM
tokens when they roll-over. (Jason Lowe via vinodkv)

Release 2.7.0 - 2015-04-20

INCOMPATIBLE CHANGES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@

package org.apache.hadoop.mapreduce.v2.app.local;

import java.io.IOException;
import java.util.ArrayList;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
Expand All @@ -35,17 +37,22 @@
import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.client.ClientRMProxy;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;

/**
* Allocates containers locally. Doesn't allocate a real container;
Expand Down Expand Up @@ -99,8 +106,9 @@ protected synchronized void heartbeat() throws Exception {
AllocateRequest.newInstance(this.lastResponseID,
super.getApplicationProgress(), new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>(), null);
AllocateResponse allocateResponse = null;
try {
scheduler.allocate(allocateRequest);
allocateResponse = scheduler.allocate(allocateRequest);
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
Expand Down Expand Up @@ -131,6 +139,24 @@ protected synchronized void heartbeat() throws Exception {
// continue to attempt to contact the RM.
throw e;
}

if (allocateResponse != null) {
this.lastResponseID = allocateResponse.getResponseId();
Token token = allocateResponse.getAMRMToken();
if (token != null) {
updateAMRMToken(token);
}
}
}

private void updateAMRMToken(Token token) throws IOException {
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken =
new org.apache.hadoop.security.token.Token<AMRMTokenIdentifier>(token
.getIdentifier().array(), token.getPassword().array(), new Text(
token.getKind()), new Text(token.getService()));
UserGroupInformation currentUGI = UserGroupInformation.getCurrentUser();
currentUGI.addToken(amrmToken);
amrmToken.setService(ClientRMProxy.getAMRMTokenService(getConfig()));
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,43 @@
import static org.mockito.Mockito.when;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.Collections;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.ClusterInfo;
import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease;
import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NMToken;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.ClientRMProxy;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.Assert;
import org.junit.Test;

Expand All @@ -48,8 +68,13 @@ public class TestLocalContainerAllocator {
public void testRMConnectionRetry() throws Exception {
// verify the connection exception is thrown
// if we haven't exhausted the retry interval
ApplicationMasterProtocol mockScheduler =
mock(ApplicationMasterProtocol.class);
when(mockScheduler.allocate(isA(AllocateRequest.class)))
.thenThrow(RPCUtil.getRemoteException(new IOException("forcefail")));
Configuration conf = new Configuration();
LocalContainerAllocator lca = new StubbedLocalContainerAllocator();
LocalContainerAllocator lca =
new StubbedLocalContainerAllocator(mockScheduler);
lca.init(conf);
lca.start();
try {
Expand All @@ -63,7 +88,7 @@ public void testRMConnectionRetry() throws Exception {

// verify YarnRuntimeException is thrown when the retry interval has expired
conf.setLong(MRJobConfig.MR_AM_TO_RM_WAIT_INTERVAL_MS, 0);
lca = new StubbedLocalContainerAllocator();
lca = new StubbedLocalContainerAllocator(mockScheduler);
lca.init(conf);
lca.start();
try {
Expand All @@ -76,12 +101,84 @@ public void testRMConnectionRetry() throws Exception {
}
}

@Test
public void testAllocResponseId() throws Exception {
ApplicationMasterProtocol scheduler = new MockScheduler();
Configuration conf = new Configuration();
LocalContainerAllocator lca =
new StubbedLocalContainerAllocator(scheduler);
lca.init(conf);
lca.start();

// do two heartbeats to verify the response ID is being tracked
lca.heartbeat();
lca.heartbeat();
lca.close();
}

@Test
public void testAMRMTokenUpdate() throws Exception {
Configuration conf = new Configuration();
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(
ApplicationId.newInstance(1, 1), 1);
AMRMTokenIdentifier oldTokenId = new AMRMTokenIdentifier(attemptId, 1);
AMRMTokenIdentifier newTokenId = new AMRMTokenIdentifier(attemptId, 2);
Token<AMRMTokenIdentifier> oldToken = new Token<AMRMTokenIdentifier>(
oldTokenId.getBytes(), "oldpassword".getBytes(), oldTokenId.getKind(),
new Text());
Token<AMRMTokenIdentifier> newToken = new Token<AMRMTokenIdentifier>(
newTokenId.getBytes(), "newpassword".getBytes(), newTokenId.getKind(),
new Text());

MockScheduler scheduler = new MockScheduler();
scheduler.amToken = newToken;

final LocalContainerAllocator lca =
new StubbedLocalContainerAllocator(scheduler);
lca.init(conf);
lca.start();

UserGroupInformation testUgi = UserGroupInformation.createUserForTesting(
"someuser", new String[0]);
testUgi.addToken(oldToken);
testUgi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
lca.heartbeat();
return null;
}
});
lca.close();

// verify there is only one AMRM token in the UGI and it matches the
// updated token from the RM
int tokenCount = 0;
Token<? extends TokenIdentifier> ugiToken = null;
for (Token<? extends TokenIdentifier> token : testUgi.getTokens()) {
if (AMRMTokenIdentifier.KIND_NAME.equals(token.getKind())) {
ugiToken = token;
++tokenCount;
}
}

Assert.assertEquals("too many AMRM tokens", 1, tokenCount);
Assert.assertArrayEquals("token identifier not updated",
newToken.getIdentifier(), ugiToken.getIdentifier());
Assert.assertArrayEquals("token password not updated",
newToken.getPassword(), ugiToken.getPassword());
Assert.assertEquals("AMRM token service not updated",
new Text(ClientRMProxy.getAMRMTokenService(conf)),
ugiToken.getService());
}

private static class StubbedLocalContainerAllocator
extends LocalContainerAllocator {
private ApplicationMasterProtocol scheduler;

public StubbedLocalContainerAllocator() {
public StubbedLocalContainerAllocator(ApplicationMasterProtocol scheduler) {
super(mock(ClientService.class), createAppContext(),
"nmhost", 1, 2, null);
this.scheduler = scheduler;
}

@Override
Expand All @@ -99,13 +196,6 @@ protected void startAllocatorThread() {

@Override
protected ApplicationMasterProtocol createSchedulerProxy() {
ApplicationMasterProtocol scheduler = mock(ApplicationMasterProtocol.class);
try {
when(scheduler.allocate(isA(AllocateRequest.class)))
.thenThrow(RPCUtil.getRemoteException(new IOException("forcefail")));
} catch (YarnException e) {
} catch (IOException e) {
}
return scheduler;
}

Expand All @@ -126,4 +216,46 @@ private static AppContext createAppContext() {
return ctx;
}
}

private static class MockScheduler implements ApplicationMasterProtocol {
int responseId = 0;
Token<AMRMTokenIdentifier> amToken = null;

@Override
public RegisterApplicationMasterResponse registerApplicationMaster(
RegisterApplicationMasterRequest request) throws YarnException,
IOException {
return null;
}

@Override
public FinishApplicationMasterResponse finishApplicationMaster(
FinishApplicationMasterRequest request) throws YarnException,
IOException {
return null;
}

@Override
public AllocateResponse allocate(AllocateRequest request)
throws YarnException, IOException {
Assert.assertEquals("response ID mismatch",
responseId, request.getResponseId());
++responseId;
org.apache.hadoop.yarn.api.records.Token yarnToken = null;
if (amToken != null) {
yarnToken = org.apache.hadoop.yarn.api.records.Token.newInstance(
amToken.getIdentifier(), amToken.getKind().toString(),
amToken.getPassword(), amToken.getService().toString());
}
return AllocateResponse.newInstance(responseId,
Collections.<ContainerStatus>emptyList(),
Collections.<Container>emptyList(),
Collections.<NodeReport>emptyList(),
Resources.none(), null, 1, null,
Collections.<NMToken>emptyList(),
yarnToken,
Collections.<ContainerResourceIncrease>emptyList(),
Collections.<ContainerResourceDecrease>emptyList());
}
}
}

0 comments on commit 9fc32c5

Please sign in to comment.