001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.tap.local; 022 023 import java.beans.ConstructorProperties; 024 import java.io.IOException; 025 import java.io.OutputStream; 026 import java.util.Properties; 027 028 import cascading.flow.FlowProcess; 029 import cascading.tap.BaseTemplateTap; 030 import cascading.tap.SinkMode; 031 import cascading.tap.Tap; 032 import cascading.tap.local.io.TapFileOutputStream; 033 import cascading.tuple.Fields; 034 import cascading.tuple.TupleEntrySchemeCollector; 035 036 /** 037 * Class TemplateTap can be used to write tuple streams out to files and sub-directories based on the values in the {@link cascading.tuple.Tuple} 038 * instance. 039 * <p/> 040 * The constructor takes a {@link FileTap} {@link cascading.tap.Tap} and a {@link java.util.Formatter} format syntax String. This allows 041 * Tuple values at given positions to be used as directory names. 042 * <p/> 043 * {@code openTapsThreshold} limits the number of open files to be output to. This value defaults to 300 files. 044 * Each time the threshold is exceeded, 10% of the least recently used open files will be closed. 045 * <p/> 046 * TemplateTap will populate a given {@code pathTemplate} without regard to case of the values being used. Thus 047 * the resulting paths {@code 2012/June/} and {@code 2012/june/} will likely result in two open files into the same 048 * location. Forcing the case to be consistent with an upstream {@link cascading.operation.Function} is recommended, see 049 * {@link cascading.operation.expression.ExpressionFunction}. 050 */ 051 @Deprecated 052 public class TemplateTap extends BaseTemplateTap<Properties, OutputStream> 053 { 054 /** 055 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 056 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 057 * 058 * @param parent of type Tap 059 * @param pathTemplate of type String 060 */ 061 @ConstructorProperties({"parent", "pathTemplate"}) 062 public TemplateTap( FileTap parent, String pathTemplate ) 063 { 064 this( parent, pathTemplate, OPEN_TAPS_THRESHOLD_DEFAULT ); 065 } 066 067 /** 068 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 069 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 070 * <p/> 071 * {@code openTapsThreshold} limits the number of open files to be output to. 072 * 073 * @param parent of type Hfs 074 * @param pathTemplate of type String 075 * @param openTapsThreshold of type int 076 */ 077 @ConstructorProperties({"parent", "pathTemplate", "openTapsThreshold"}) 078 public TemplateTap( FileTap parent, String pathTemplate, int openTapsThreshold ) 079 { 080 super( parent, pathTemplate, openTapsThreshold ); 081 } 082 083 /** 084 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 085 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 086 * 087 * @param parent of type Tap 088 * @param pathTemplate of type String 089 * @param sinkMode of type SinkMode 090 */ 091 @ConstructorProperties({"parent", "pathTemplate", "sinkMode"}) 092 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode ) 093 { 094 super( parent, pathTemplate, sinkMode ); 095 } 096 097 /** 098 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 099 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 100 * <p/> 101 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 102 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 103 * 104 * @param parent of type Tap 105 * @param pathTemplate of type String 106 * @param sinkMode of type SinkMode 107 * @param keepParentOnDelete of type boolean 108 */ 109 @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete"}) 110 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete ) 111 { 112 this( parent, pathTemplate, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT ); 113 } 114 115 /** 116 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 117 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 118 * <p/> 119 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 120 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 121 * <p/> 122 * {@code openTapsThreshold} limits the number of open files to be output to. 123 * 124 * @param parent of type Tap 125 * @param pathTemplate of type String 126 * @param sinkMode of type SinkMode 127 * @param keepParentOnDelete of type boolean 128 * @param openTapsThreshold of type int 129 */ 130 @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete", "openTapsThreshold"}) 131 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold ) 132 { 133 super( parent, pathTemplate, sinkMode, keepParentOnDelete, openTapsThreshold ); 134 } 135 136 /** 137 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 138 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 139 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 140 * <p/> 141 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 142 * data not in the result file to be used in the template path name. 143 * 144 * @param parent of type Tap 145 * @param pathTemplate of type String 146 * @param pathFields of type Fields 147 */ 148 @ConstructorProperties({"parent", "pathTemplate", "pathFields"}) 149 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields ) 150 { 151 this( parent, pathTemplate, pathFields, OPEN_TAPS_THRESHOLD_DEFAULT ); 152 } 153 154 /** 155 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 156 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 157 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 158 * <p/> 159 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 160 * data not in the result file to be used in the template path name. 161 * <p/> 162 * {@code openTapsThreshold} limits the number of open files to be output to. 163 * 164 * @param parent of type Hfs 165 * @param pathTemplate of type String 166 * @param pathFields of type Fields 167 * @param openTapsThreshold of type int 168 */ 169 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "openTapsThreshold"}) 170 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, int openTapsThreshold ) 171 { 172 super( parent, pathTemplate, pathFields, openTapsThreshold ); 173 } 174 175 /** 176 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 177 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 178 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 179 * <p/> 180 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 181 * data not in the result file to be used in the template path name. 182 * 183 * @param parent of type Tap 184 * @param pathTemplate of type String 185 * @param pathFields of type Fields 186 * @param sinkMode of type SinkMode 187 */ 188 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode"}) 189 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode ) 190 { 191 super( parent, pathTemplate, pathFields, sinkMode ); 192 } 193 194 /** 195 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 196 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 197 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 198 * <p/> 199 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 200 * data not in the result file to be used in the template path name. 201 * <p/> 202 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 203 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 204 * 205 * @param parent of type Tap 206 * @param pathTemplate of type String 207 * @param pathFields of type Fields 208 * @param sinkMode of type SinkMode 209 * @param keepParentOnDelete of type boolean 210 */ 211 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete"}) 212 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete ) 213 { 214 this( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT ); 215 } 216 217 /** 218 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 219 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 220 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 221 * <p/> 222 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 223 * data not in the result file to be used in the template path name. 224 * <p/> 225 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 226 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 227 * <p/> 228 * {@code openTapsThreshold} limits the number of open files to be output to. 229 * 230 * @param parent of type Hfs 231 * @param pathTemplate of type String 232 * @param pathFields of type Fields 233 * @param sinkMode of type SinkMode 234 * @param keepParentOnDelete of type boolean 235 * @param openTapsThreshold of type int 236 */ 237 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete", 238 "openTapsThreshold"}) 239 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold ) 240 { 241 super( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, openTapsThreshold ); 242 } 243 244 @Override 245 protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<Properties> flowProcess, Tap parent, String path ) throws IOException 246 { 247 TapFileOutputStream output = new TapFileOutputStream( parent, path, isUpdate() ); // append if we are in update mode 248 249 return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, parent, output ); 250 } 251 }