001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.pipe.assembly;
022    
023    import java.beans.ConstructorProperties;
024    
025    import cascading.flow.FlowProcess;
026    import cascading.operation.aggregator.First;
027    import cascading.pipe.Pipe;
028    import cascading.tuple.Fields;
029    import cascading.tuple.Tuple;
030    import cascading.tuple.TupleEntry;
031    
032    /**
033     * Class FirstBy is used to return the first encountered Tuple in a tuple stream grouping.
034     * <p/>
035     * Typically finding the first Tuple in a tuple stream grouping relies on a {@link cascading.pipe.GroupBy} and a
036     * {@link cascading.operation.aggregator.First} {@link cascading.operation.Aggregator} operation.
037     * <p/>
038     * If the {@code firstFields} argument has custom {@link java.util.Comparator} instances, they will be used
039     * as the GroupBy {@code sortFields}.
040     * <p/>
041     * This SubAssembly also uses the {@link cascading.pipe.assembly.FirstBy.FirstPartials}
042     * {@link cascading.pipe.assembly.AggregateBy.Functor}
043     * to collect field values before the GroupBy operator to reduce IO over the network.
044     * <p/>
045     * This strategy is similar to using {@code combiners}, except no sorting or serialization is invoked and results
046     * in a much simpler mechanism.
047     * <p/>
048     * The {@code threshold} value tells the underlying FirstPartials functions how many unique key counts to accumulate
049     * in the LRU cache, before emitting the least recently used entry.
050     * <p/>
051     * By default, either the value of {@link #AGGREGATE_BY_THRESHOLD} System property or {@link AggregateBy#DEFAULT_THRESHOLD}
052     * will be used.
053     *
054     * @see AggregateBy
055     */
056    public class FirstBy extends AggregateBy
057      {
058      /**
059       * Class CountPartials is a {@link cascading.pipe.assembly.AggregateBy.Functor} that is used to count observed duplicates from the tuple stream.
060       * <p/>
061       * Use this class typically in tandem with a {@link cascading.operation.aggregator.Sum}
062       * {@link cascading.operation.Aggregator} in order to improve counting performance by removing as many values
063       * as possible before the intermediate {@link cascading.pipe.GroupBy} operator.
064       *
065       * @see cascading.pipe.assembly.FirstBy
066       */
067      public static class FirstPartials implements Functor
068        {
069        private final Fields declaredFields;
070    
071        /**
072         * Constructor FirstPartials creates a new FirstPartials instance.
073         *
074         * @param declaredFields of type Fields
075         */
076        public FirstPartials( Fields declaredFields )
077          {
078          this.declaredFields = declaredFields;
079    
080          if( !declaredFields.isDeclarator() || declaredFields.size() != 1 )
081            throw new IllegalArgumentException( "declaredFields should declare only one field name" );
082          }
083    
084        @Override
085        public Fields getDeclaredFields()
086          {
087          return declaredFields;
088          }
089    
090        @Override
091        public Tuple aggregate( FlowProcess flowProcess, TupleEntry args, Tuple context )
092          {
093          if( context == null || args.getFields().compare( context, args.getTuple() ) > 0 )
094            return args.getTupleCopy();
095    
096          return context;
097          }
098    
099        @Override
100        public Tuple complete( FlowProcess flowProcess, Tuple context )
101          {
102          return context;
103          }
104        }
105    
106      /**
107       * Constructor FirstBy creates a new FirstBy instance. Use this constructor when used with a {@link AggregateBy}
108       * instance.
109       *
110       * @param firstFields of type Fields
111       */
112      @ConstructorProperties({"firstFields"})
113      public FirstBy( Fields firstFields )
114        {
115        super( firstFields, new FirstPartials( firstFields ), new First( firstFields ) );
116        }
117    
118      /**
119       * Constructor FirstBy creates a new FirstBy instance. Use this constructor when used with a {@link AggregateBy}
120       * instance.
121       *
122       * @param firstFields of type Fields
123       */
124      @ConstructorProperties({"argumentFields", "firstFields"})
125      public FirstBy( Fields argumentFields, Fields firstFields )
126        {
127        super( argumentFields, new FirstPartials( argumentFields ), new First( firstFields ) );
128        }
129    
130      ///////
131    
132      /**
133       * Constructor FirstBy creates a new FirstBy instance.
134       *
135       * @param pipe           of type Pipe
136       * @param groupingFields of type Fields
137       * @param firstFields    of type Fields
138       */
139      @ConstructorProperties({"pipe", "groupingFields", "firstFields"})
140      public FirstBy( Pipe pipe, Fields groupingFields, Fields firstFields )
141        {
142        this( null, pipe, groupingFields, firstFields );
143        }
144    
145      /**
146       * Constructor FirstBy creates a new FirstBy instance.
147       *
148       * @param pipe           of type Pipe
149       * @param groupingFields of type Fields
150       * @param firstFields    fo type Fields
151       * @param threshold      of type int
152       */
153      @ConstructorProperties({"pipe", "groupingFields", "firstFields", "threshold"})
154      public FirstBy( Pipe pipe, Fields groupingFields, Fields firstFields, int threshold )
155        {
156        this( null, pipe, groupingFields, firstFields, threshold );
157        }
158    
159      /**
160       * Constructor FirstBy creates a new FirstBy instance.
161       *
162       * @param name           of type String
163       * @param pipe           of type Pipe
164       * @param groupingFields of type Fields
165       * @param firstFields    of type Fields
166       */
167      @ConstructorProperties({"name", "pipe", "groupingFields", "firstFields"})
168      public FirstBy( String name, Pipe pipe, Fields groupingFields, Fields firstFields )
169        {
170        this( name, pipe, groupingFields, firstFields, USE_DEFAULT_THRESHOLD );
171        }
172    
173      /**
174       * Constructor FirstBy creates a new FirstBy instance.
175       *
176       * @param name           of type String
177       * @param pipe           of type Pipe
178       * @param groupingFields of type Fields
179       * @param firstFields    of type Fields
180       * @param threshold      of type int
181       */
182      @ConstructorProperties({"name", "pipe", "groupingFields", "firstFields", "threshold"})
183      public FirstBy( String name, Pipe pipe, Fields groupingFields, Fields firstFields, int threshold )
184        {
185        this( name, Pipe.pipes( pipe ), groupingFields, firstFields, threshold );
186        }
187    
188      /**
189       * Constructor FirstBy creates a new FirstBy instance.
190       *
191       * @param pipes          of type Pipe[]
192       * @param groupingFields of type Fields
193       * @param firstFields    of type Fields
194       */
195      @ConstructorProperties({"pipes", "groupingFields", "firstFields"})
196      public FirstBy( Pipe[] pipes, Fields groupingFields, Fields firstFields )
197        {
198        this( null, pipes, groupingFields, firstFields, USE_DEFAULT_THRESHOLD );
199        }
200    
201      /**
202       * Constructor FirstBy creates a new FirstBy instance.
203       *
204       * @param pipes          of type Pipe[]
205       * @param groupingFields of type Fields
206       * @param firstFields    of type Fields
207       * @param threshold      of type int
208       */
209      @ConstructorProperties({"pipes", "groupingFields", "firstFields", "threshold"})
210      public FirstBy( Pipe[] pipes, Fields groupingFields, Fields firstFields, int threshold )
211        {
212        this( null, pipes, groupingFields, firstFields, threshold );
213        }
214    
215      /**
216       * Constructor FirstBy creates a new FirstBy instance.
217       *
218       * @param name           of type String
219       * @param pipes          of type Pipe[]
220       * @param groupingFields of type Fields
221       * @param firstFields    of type Fields
222       */
223      @ConstructorProperties({"name", "pipes", "groupingFields", "firstFields"})
224      public FirstBy( String name, Pipe[] pipes, Fields groupingFields, Fields firstFields )
225        {
226        this( name, pipes, groupingFields, firstFields, USE_DEFAULT_THRESHOLD );
227        }
228    
229      /**
230       * Constructor FirstBy creates a new FirstBy instance.
231       *
232       * @param name           of type String
233       * @param pipes          of type Pipe[]
234       * @param groupingFields of type Fields
235       * @param firstFields    of type Fields
236       * @param threshold      of type int
237       */
238      @ConstructorProperties({"name", "pipes", "groupingFields", "firstFields", "threshold"})
239      public FirstBy( String name, Pipe[] pipes, Fields groupingFields, Fields firstFields, int threshold )
240        {
241        super( name, pipes, groupingFields, firstFields, new FirstPartials( firstFields ), new First( firstFields ), threshold );
242        }
243      }