001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.tap.local;
022    
023    import java.beans.ConstructorProperties;
024    import java.io.IOException;
025    import java.io.OutputStream;
026    import java.util.Properties;
027    
028    import cascading.flow.FlowProcess;
029    import cascading.tap.BaseTemplateTap;
030    import cascading.tap.SinkMode;
031    import cascading.tap.Tap;
032    import cascading.tap.local.io.TapFileOutputStream;
033    import cascading.tuple.Fields;
034    import cascading.tuple.TupleEntrySchemeCollector;
035    
036    /**
037     * Class TemplateTap can be used to write tuple streams out to files and sub-directories based on the values in the {@link cascading.tuple.Tuple}
038     * instance.
039     * <p/>
040     * The constructor takes a {@link FileTap} {@link cascading.tap.Tap} and a {@link java.util.Formatter} format syntax String. This allows
041     * Tuple values at given positions to be used as directory names.
042     * <p/>
043     * {@code openTapsThreshold} limits the number of open files to be output to. This value defaults to 300 files.
044     * Each time the threshold is exceeded, 10% of the least recently used open files will be closed.
045     * <p/>
046     * TemplateTap will populate a given {@code pathTemplate} without regard to case of the values being used. Thus
047     * the resulting paths {@code 2012/June/} and {@code 2012/june/} will likely result in two open files into the same
048     * location. Forcing the case to be consistent with an upstream {@link cascading.operation.Function} is recommended, see
049     * {@link cascading.operation.expression.ExpressionFunction}.
050     */
051    @Deprecated
052    public class TemplateTap extends BaseTemplateTap<Properties, OutputStream>
053      {
054      /**
055       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
056       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
057       *
058       * @param parent       of type Tap
059       * @param pathTemplate of type String
060       */
061      @ConstructorProperties({"parent", "pathTemplate"})
062      public TemplateTap( FileTap parent, String pathTemplate )
063        {
064        this( parent, pathTemplate, OPEN_TAPS_THRESHOLD_DEFAULT );
065        }
066    
067      /**
068       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
069       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
070       * <p/>
071       * {@code openTapsThreshold} limits the number of open files to be output to.
072       *
073       * @param parent            of type Hfs
074       * @param pathTemplate      of type String
075       * @param openTapsThreshold of type int
076       */
077      @ConstructorProperties({"parent", "pathTemplate", "openTapsThreshold"})
078      public TemplateTap( FileTap parent, String pathTemplate, int openTapsThreshold )
079        {
080        super( parent, pathTemplate, openTapsThreshold );
081        }
082    
083      /**
084       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
085       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
086       *
087       * @param parent       of type Tap
088       * @param pathTemplate of type String
089       * @param sinkMode     of type SinkMode
090       */
091      @ConstructorProperties({"parent", "pathTemplate", "sinkMode"})
092      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode )
093        {
094        super( parent, pathTemplate, sinkMode );
095        }
096    
097      /**
098       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
099       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
100       * <p/>
101       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
102       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
103       *
104       * @param parent             of type Tap
105       * @param pathTemplate       of type String
106       * @param sinkMode           of type SinkMode
107       * @param keepParentOnDelete of type boolean
108       */
109      @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete"})
110      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete )
111        {
112        this( parent, pathTemplate, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT );
113        }
114    
115      /**
116       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
117       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
118       * <p/>
119       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
120       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
121       * <p/>
122       * {@code openTapsThreshold} limits the number of open files to be output to.
123       *
124       * @param parent             of type Tap
125       * @param pathTemplate       of type String
126       * @param sinkMode           of type SinkMode
127       * @param keepParentOnDelete of type boolean
128       * @param openTapsThreshold  of type int
129       */
130      @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete", "openTapsThreshold"})
131      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold )
132        {
133        super( parent, pathTemplate, sinkMode, keepParentOnDelete, openTapsThreshold );
134        }
135    
136      /**
137       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
138       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
139       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
140       * <p/>
141       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
142       * data not in the result file to be used in the template path name.
143       *
144       * @param parent       of type Tap
145       * @param pathTemplate of type String
146       * @param pathFields   of type Fields
147       */
148      @ConstructorProperties({"parent", "pathTemplate", "pathFields"})
149      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields )
150        {
151        this( parent, pathTemplate, pathFields, OPEN_TAPS_THRESHOLD_DEFAULT );
152        }
153    
154      /**
155       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
156       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
157       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
158       * <p/>
159       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
160       * data not in the result file to be used in the template path name.
161       * <p/>
162       * {@code openTapsThreshold} limits the number of open files to be output to.
163       *
164       * @param parent            of type Hfs
165       * @param pathTemplate      of type String
166       * @param pathFields        of type Fields
167       * @param openTapsThreshold of type int
168       */
169      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "openTapsThreshold"})
170      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, int openTapsThreshold )
171        {
172        super( parent, pathTemplate, pathFields, openTapsThreshold );
173        }
174    
175      /**
176       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
177       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
178       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
179       * <p/>
180       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
181       * data not in the result file to be used in the template path name.
182       *
183       * @param parent       of type Tap
184       * @param pathTemplate of type String
185       * @param pathFields   of type Fields
186       * @param sinkMode     of type SinkMode
187       */
188      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode"})
189      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode )
190        {
191        super( parent, pathTemplate, pathFields, sinkMode );
192        }
193    
194      /**
195       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
196       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
197       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
198       * <p/>
199       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
200       * data not in the result file to be used in the template path name.
201       * <p/>
202       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
203       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
204       *
205       * @param parent             of type Tap
206       * @param pathTemplate       of type String
207       * @param pathFields         of type Fields
208       * @param sinkMode           of type SinkMode
209       * @param keepParentOnDelete of type boolean
210       */
211      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete"})
212      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete )
213        {
214        this( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT );
215        }
216    
217      /**
218       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
219       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
220       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
221       * <p/>
222       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
223       * data not in the result file to be used in the template path name.
224       * <p/>
225       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
226       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
227       * <p/>
228       * {@code openTapsThreshold} limits the number of open files to be output to.
229       *
230       * @param parent             of type Hfs
231       * @param pathTemplate       of type String
232       * @param pathFields         of type Fields
233       * @param sinkMode           of type SinkMode
234       * @param keepParentOnDelete of type boolean
235       * @param openTapsThreshold  of type int
236       */
237      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete",
238                              "openTapsThreshold"})
239      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold )
240        {
241        super( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, openTapsThreshold );
242        }
243    
244      @Override
245      protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<Properties> flowProcess, Tap parent, String path ) throws IOException
246        {
247        TapFileOutputStream output = new TapFileOutputStream( parent, path, isUpdate() ); // append if we are in update mode
248    
249        return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, parent, output );
250        }
251      }