001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.flow.hadoop.planner;
022    
023    import java.io.IOException;
024    
025    import cascading.flow.hadoop.HadoopFlowStep;
026    import cascading.flow.planner.BaseFlowStep;
027    import cascading.flow.planner.FlowStepJob;
028    import cascading.management.state.ClientState;
029    import cascading.stats.FlowStepStats;
030    import cascading.stats.hadoop.HadoopStepStats;
031    import org.apache.hadoop.mapred.JobClient;
032    import org.apache.hadoop.mapred.JobConf;
033    import org.apache.hadoop.mapred.JobStatus;
034    import org.apache.hadoop.mapred.RunningJob;
035    import org.apache.hadoop.mapred.TaskCompletionEvent;
036    
037    import static cascading.flow.FlowProps.JOB_POLLING_INTERVAL;
038    import static cascading.stats.CascadingStats.STATS_STORE_INTERVAL;
039    
040    /**
041     *
042     */
043    public class HadoopFlowStepJob extends FlowStepJob<JobConf>
044      {
045      /** Field currentConf */
046      private final JobConf currentConf;
047      /** Field jobClient */
048      private JobClient jobClient;
049      /** Field runningJob */
050      private RunningJob runningJob;
051    
052      private static long getStoreInterval( JobConf jobConf )
053        {
054        return jobConf.getLong( STATS_STORE_INTERVAL, 60 * 1000 );
055        }
056    
057      public static long getJobPollingInterval( JobConf jobConf )
058        {
059        return jobConf.getLong( JOB_POLLING_INTERVAL, 5000 );
060        }
061    
062      public HadoopFlowStepJob( ClientState clientState, BaseFlowStep flowStep, JobConf currentConf )
063        {
064        super( clientState, flowStep, getJobPollingInterval( currentConf ), getStoreInterval( currentConf ) );
065        this.currentConf = currentConf;
066    
067        if( flowStep.isDebugEnabled() )
068          flowStep.logDebug( "using polling interval: " + pollingInterval );
069        }
070    
071      @Override
072      public JobConf getConfig()
073        {
074        return currentConf;
075        }
076    
077      @Override
078      protected FlowStepStats createStepStats( ClientState clientState )
079        {
080        return new HadoopStepStats( flowStep, clientState )
081        {
082        @Override
083        public JobClient getJobClient()
084          {
085          return jobClient;
086          }
087    
088        @Override
089        public RunningJob getRunningJob()
090          {
091          return runningJob;
092          }
093        };
094        }
095    
096      protected void internalBlockOnStop() throws IOException
097        {
098        if( runningJob != null && !runningJob.isComplete() )
099          runningJob.killJob();
100        }
101    
102      protected void internalNonBlockingStart() throws IOException
103        {
104        jobClient = new JobClient( currentConf );
105        runningJob = jobClient.submitJob( currentConf );
106    
107        flowStep.logInfo( "submitted hadoop job: " + runningJob.getID() );
108    
109        if( runningJob.getTrackingURL() != null )
110          flowStep.logInfo( "tracking url: " + runningJob.getTrackingURL() );
111        }
112    
113      protected boolean internalNonBlockingIsSuccessful() throws IOException
114        {
115        return runningJob != null && runningJob.isSuccessful();
116        }
117    
118      @Override
119      protected boolean isRemoteExecution()
120        {
121        return !( (HadoopFlowStep) flowStep ).isHadoopLocalMode( getConfig() );
122        }
123    
124      @Override
125      protected Throwable getThrowable()
126        {
127        return null;
128        }
129    
130      protected String internalJobId()
131        {
132        return runningJob.getJobID();
133        }
134    
135      protected boolean internalNonBlockingIsComplete() throws IOException
136        {
137        return runningJob.isComplete();
138        }
139    
140      protected void dumpDebugInfo()
141        {
142        try
143          {
144          if( runningJob == null )
145            return;
146    
147          flowStep.logWarn( "hadoop job " + runningJob.getID() + " state at " + JobStatus.getJobRunState( runningJob.getJobState() ) );
148          flowStep.logWarn( "failure info: " + runningJob.getFailureInfo() );
149    
150          TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents( 0 );
151          flowStep.logWarn( "task completion events identify failed tasks" );
152          flowStep.logWarn( "task completion events count: " + events.length );
153    
154          for( TaskCompletionEvent event : events )
155            flowStep.logWarn( "event = " + event );
156          }
157        catch( IOException exception )
158          {
159          flowStep.logError( "failed reading task completion events", exception );
160          }
161        }
162    
163      protected boolean internalIsStarted()
164        {
165        if( runningJob == null )
166          return false;
167    
168        try
169          {
170          return runningJob.mapProgress() > 0;
171          }
172        catch( IOException exception )
173          {
174          flowStep.logWarn( "unable to test for map progress", exception );
175          return false;
176          }
177        }
178      }