001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.flow.hadoop.planner; 022 023 import java.io.IOException; 024 025 import cascading.flow.hadoop.HadoopFlowStep; 026 import cascading.flow.planner.BaseFlowStep; 027 import cascading.flow.planner.FlowStepJob; 028 import cascading.management.state.ClientState; 029 import cascading.stats.FlowStepStats; 030 import cascading.stats.hadoop.HadoopStepStats; 031 import org.apache.hadoop.mapred.JobClient; 032 import org.apache.hadoop.mapred.JobConf; 033 import org.apache.hadoop.mapred.JobStatus; 034 import org.apache.hadoop.mapred.RunningJob; 035 import org.apache.hadoop.mapred.TaskCompletionEvent; 036 037 import static cascading.flow.FlowProps.JOB_POLLING_INTERVAL; 038 import static cascading.stats.CascadingStats.STATS_STORE_INTERVAL; 039 040 /** 041 * 042 */ 043 public class HadoopFlowStepJob extends FlowStepJob<JobConf> 044 { 045 /** Field currentConf */ 046 private final JobConf currentConf; 047 /** Field jobClient */ 048 private JobClient jobClient; 049 /** Field runningJob */ 050 private RunningJob runningJob; 051 052 private static long getStoreInterval( JobConf jobConf ) 053 { 054 return jobConf.getLong( STATS_STORE_INTERVAL, 60 * 1000 ); 055 } 056 057 public static long getJobPollingInterval( JobConf jobConf ) 058 { 059 return jobConf.getLong( JOB_POLLING_INTERVAL, 5000 ); 060 } 061 062 public HadoopFlowStepJob( ClientState clientState, BaseFlowStep flowStep, JobConf currentConf ) 063 { 064 super( clientState, flowStep, getJobPollingInterval( currentConf ), getStoreInterval( currentConf ) ); 065 this.currentConf = currentConf; 066 067 if( flowStep.isDebugEnabled() ) 068 flowStep.logDebug( "using polling interval: " + pollingInterval ); 069 } 070 071 @Override 072 public JobConf getConfig() 073 { 074 return currentConf; 075 } 076 077 @Override 078 protected FlowStepStats createStepStats( ClientState clientState ) 079 { 080 return new HadoopStepStats( flowStep, clientState ) 081 { 082 @Override 083 public JobClient getJobClient() 084 { 085 return jobClient; 086 } 087 088 @Override 089 public RunningJob getRunningJob() 090 { 091 return runningJob; 092 } 093 }; 094 } 095 096 protected void internalBlockOnStop() throws IOException 097 { 098 if( runningJob != null && !runningJob.isComplete() ) 099 runningJob.killJob(); 100 } 101 102 protected void internalNonBlockingStart() throws IOException 103 { 104 jobClient = new JobClient( currentConf ); 105 runningJob = jobClient.submitJob( currentConf ); 106 107 flowStep.logInfo( "submitted hadoop job: " + runningJob.getID() ); 108 109 if( runningJob.getTrackingURL() != null ) 110 flowStep.logInfo( "tracking url: " + runningJob.getTrackingURL() ); 111 } 112 113 protected boolean internalNonBlockingIsSuccessful() throws IOException 114 { 115 return runningJob != null && runningJob.isSuccessful(); 116 } 117 118 @Override 119 protected boolean isRemoteExecution() 120 { 121 return !( (HadoopFlowStep) flowStep ).isHadoopLocalMode( getConfig() ); 122 } 123 124 @Override 125 protected Throwable getThrowable() 126 { 127 return null; 128 } 129 130 protected String internalJobId() 131 { 132 return runningJob.getJobID(); 133 } 134 135 protected boolean internalNonBlockingIsComplete() throws IOException 136 { 137 return runningJob.isComplete(); 138 } 139 140 protected void dumpDebugInfo() 141 { 142 try 143 { 144 if( runningJob == null ) 145 return; 146 147 flowStep.logWarn( "hadoop job " + runningJob.getID() + " state at " + JobStatus.getJobRunState( runningJob.getJobState() ) ); 148 flowStep.logWarn( "failure info: " + runningJob.getFailureInfo() ); 149 150 TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents( 0 ); 151 flowStep.logWarn( "task completion events identify failed tasks" ); 152 flowStep.logWarn( "task completion events count: " + events.length ); 153 154 for( TaskCompletionEvent event : events ) 155 flowStep.logWarn( "event = " + event ); 156 } 157 catch( IOException exception ) 158 { 159 flowStep.logError( "failed reading task completion events", exception ); 160 } 161 } 162 163 protected boolean internalIsStarted() 164 { 165 if( runningJob == null ) 166 return false; 167 168 try 169 { 170 return runningJob.mapProgress() > 0; 171 } 172 catch( IOException exception ) 173 { 174 flowStep.logWarn( "unable to test for map progress", exception ); 175 return false; 176 } 177 } 178 }