001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.flow.hadoop;
022    
023    import java.io.IOException;
024    import java.util.Iterator;
025    
026    import cascading.CascadingException;
027    import cascading.flow.FlowException;
028    import cascading.flow.FlowSession;
029    import cascading.flow.FlowStep;
030    import cascading.flow.SliceCounters;
031    import cascading.flow.hadoop.stream.HadoopGroupGate;
032    import cascading.flow.hadoop.stream.HadoopReduceStreamGraph;
033    import cascading.flow.hadoop.util.HadoopUtil;
034    import cascading.flow.hadoop.util.TimedIterator;
035    import cascading.flow.stream.Duct;
036    import cascading.flow.stream.ElementDuct;
037    import cascading.tap.Tap;
038    import cascading.tuple.Tuple;
039    import org.apache.hadoop.mapred.JobConf;
040    import org.apache.hadoop.mapred.MapReduceBase;
041    import org.apache.hadoop.mapred.OutputCollector;
042    import org.apache.hadoop.mapred.Reducer;
043    import org.apache.hadoop.mapred.Reporter;
044    import org.slf4j.Logger;
045    import org.slf4j.LoggerFactory;
046    
047    import static cascading.flow.hadoop.util.HadoopUtil.deserializeBase64;
048    import static cascading.flow.hadoop.util.HadoopUtil.readStateFromDistCache;
049    
050    /** Class FlowReducer is the Hadoop Reducer implementation. */
051    public class FlowReducer extends MapReduceBase implements Reducer
052      {
053      private static final Logger LOG = LoggerFactory.getLogger( FlowReducer.class );
054    
055      /** Field flowReducerStack */
056      private HadoopReduceStreamGraph streamGraph;
057      /** Field currentProcess */
058      private HadoopFlowProcess currentProcess;
059      private TimedIterator timedIterator;
060    
061      private boolean calledPrepare = false;
062      private HadoopGroupGate group;
063    
064      /** Constructor FlowReducer creates a new FlowReducer instance. */
065      public FlowReducer()
066        {
067        }
068    
069      @Override
070      public void configure( JobConf jobConf )
071        {
072        try
073          {
074          super.configure( jobConf );
075          HadoopUtil.initLog4j( jobConf );
076    
077          LOG.info( "cascading version: {}", jobConf.get( "cascading.version", "" ) );
078          LOG.info( "child jvm opts: {}", jobConf.get( "mapred.child.java.opts", "" ) );
079    
080          currentProcess = new HadoopFlowProcess( new FlowSession(), jobConf, false );
081    
082          timedIterator = new TimedIterator( currentProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read );
083    
084          String stepState = jobConf.getRaw( "cascading.flow.step" );
085    
086          if( stepState == null )
087            stepState = readStateFromDistCache( jobConf, jobConf.get( FlowStep.CASCADING_FLOW_STEP_ID ) );
088    
089          HadoopFlowStep step = deserializeBase64( stepState, jobConf, HadoopFlowStep.class );
090    
091          streamGraph = new HadoopReduceStreamGraph( currentProcess, step );
092    
093          group = (HadoopGroupGate) streamGraph.getHeads().iterator().next();
094    
095          for( Duct head : streamGraph.getHeads() )
096            LOG.info( "sourcing from: " + ( (ElementDuct) head ).getFlowElement() );
097    
098          for( Duct tail : streamGraph.getTails() )
099            LOG.info( "sinking to: " + ( (ElementDuct) tail ).getFlowElement() );
100    
101          for( Tap trap : step.getReducerTraps().values() )
102            LOG.info( "trapping to: " + trap );
103          }
104        catch( Throwable throwable )
105          {
106          if( throwable instanceof CascadingException )
107            throw (CascadingException) throwable;
108    
109          throw new FlowException( "internal error during reducer configuration", throwable );
110          }
111        }
112    
113      public void reduce( Object key, Iterator values, OutputCollector output, Reporter reporter ) throws IOException
114        {
115        currentProcess.setReporter( reporter );
116        currentProcess.setOutputCollector( output );
117    
118        timedIterator.reset( values ); // allows us to count read tuples
119    
120        if( !calledPrepare )
121          {
122          currentProcess.increment( SliceCounters.Process_Begin_Time, System.currentTimeMillis() );
123    
124          streamGraph.prepare();
125    
126          calledPrepare = true;
127    
128          group.start( group );
129          }
130    
131        try
132          {
133          group.run( (Tuple) key, timedIterator );
134          }
135        catch( OutOfMemoryError error )
136          {
137          throw error;
138          }
139        catch( Throwable throwable )
140          {
141          if( throwable instanceof CascadingException )
142            throw (CascadingException) throwable;
143    
144          throw new FlowException( "internal error during reducer execution", throwable );
145          }
146        }
147    
148      @Override
149      public void close() throws IOException
150        {
151        try
152          {
153          if( calledPrepare )
154            {
155            group.complete( group );
156    
157            streamGraph.cleanup();
158            }
159    
160          super.close();
161          }
162        finally
163          {
164          if( currentProcess != null )
165            currentProcess.increment( SliceCounters.Process_End_Time, System.currentTimeMillis() );
166          }
167        }
168      }