001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.scheme.local;
022    
023    import java.beans.ConstructorProperties;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.io.InputStreamReader;
027    import java.io.LineNumberReader;
028    import java.io.OutputStream;
029    import java.io.OutputStreamWriter;
030    import java.io.PrintWriter;
031    import java.io.UnsupportedEncodingException;
032    import java.nio.charset.Charset;
033    import java.util.Properties;
034    
035    import cascading.flow.FlowProcess;
036    import cascading.scheme.Scheme;
037    import cascading.scheme.SinkCall;
038    import cascading.scheme.SourceCall;
039    import cascading.scheme.util.DelimitedParser;
040    import cascading.tap.CompositeTap;
041    import cascading.tap.Tap;
042    import cascading.tap.TapException;
043    import cascading.tap.local.FileTap;
044    import cascading.tuple.Fields;
045    import cascading.tuple.Tuple;
046    import cascading.tuple.TupleEntry;
047    import cascading.tuple.util.TupleViews;
048    
049    /**
050     * Class TextDelimited provides direct support for delimited text files, like
051     * TAB (\t) or COMMA (,) delimited files. It also optionally allows for quoted values.
052     * <p/>
053     * TextDelimited may also be used to skip the "header" in a file, where the header is defined as the very first line
054     * in every input file. That is, if the byte offset of the current line from the input is zero (0), that line will
055     * be skipped.
056     * <p/>
057     * It is assumed if sink/source {@code fields} is set to either {@link Fields#ALL} or {@link Fields#UNKNOWN} and
058     * {@code skipHeader} or {@code hasHeader} is {@code true}, the field names will be retrieved from the header of the
059     * file and used during planning. The header will parsed with the same rules as the body of the file.
060     * <p/>
061     * By default headers are not skipped.
062     * <p/>
063     * TextDelimited may also be used to write a "header" in a file. The fields names for the header are taken directly
064     * from the declared fields. Or if the declared fields are {@link Fields#ALL} or {@link Fields#UNKNOWN}, the
065     * resolved field names will be used, if any.
066     * <p/>
067     * By default headers are not written.
068     * <p/>
069     * If {@code hasHeaders} is set to {@code true} on a constructor, both {@code skipHeader} and {@code writeHeader} will
070     * be set to {@code true}.
071     * <p/>
072     * By default this {@link cascading.scheme.Scheme} is both {@code strict} and {@code safe}.
073     * <p/>
074     * Strict meaning if a line of text does not parse into the expected number of fields, this class will throw a
075     * {@link TapException}. If strict is {@code false}, then {@link Tuple} will be returned with {@code null} values
076     * for the missing fields.
077     * <p/>
078     * Safe meaning if a field cannot be coerced into an expected type, a {@code null} will be used for the value.
079     * If safe is {@code false}, a {@link TapException} will be thrown.
080     * <p/>
081     * Also by default, {@code quote} strings are not searched for to improve processing speed. If a file is
082     * COMMA delimited but may have COMMA's in a value, the whole value should be surrounded by the quote string, typically
083     * double quotes ({@literal "}).
084     * <p/>
085     * Note all empty fields in a line will be returned as {@code null} unless coerced into a new type.
086     * <p/>
087     * This Scheme may source/sink {@link Fields#ALL}, when given on the constructor the new instance will automatically
088     * default to strict == false as the number of fields parsed are arbitrary or unknown. A type array may not be given
089     * either, so all values will be returned as Strings.
090     * <p/>
091     * By default, all text is encoded/decoded as UTF-8. This can be changed via the {@code charsetName} constructor
092     * argument.
093     * <p/>
094     * To override field and line parsing behaviors, sub-class {@link DelimitedParser} or provide a
095     * {@link cascading.scheme.util.FieldTypeResolver} implementation.
096     * <p/>
097     * Note that there should be no expectation that TextDelimited, or specifically {@link DelimitedParser}, can handle
098     * all delimited and quoted combinations reliably. Attempting to do so would impair its performance and maintainability.
099     * <p/>
100     * Further, it can be safely said any corrupted files will not be supported for obvious reasons. Corrupted files may
101     * result in exceptions or could cause edge cases in the underlying java regular expression engine.
102     * <p/>
103     * A large part of Cascading was designed to help users cleans data. Thus the recommendation is to create Flows that
104     * are responsible for cleansing large data-sets when faced with the problem
105     * <p/>
106     * DelimitedParser maybe sub-classed and extended if necessary.
107     *
108     * @see TextLine
109     */
110    public class TextDelimited extends Scheme<Properties, InputStream, OutputStream, LineNumberReader, PrintWriter>
111      {
112      public static final String DEFAULT_CHARSET = "UTF-8";
113    
114      private final boolean skipHeader;
115      private final boolean writeHeader;
116      private final DelimitedParser delimitedParser;
117      private String charsetName = DEFAULT_CHARSET;
118    
119      /**
120       * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking
121       * {@link Fields#ALL} and using TAB as the default delimiter.
122       * <p/>
123       * Use this constructor if the source and sink fields will be resolved during planning, for example, when using
124       * with a {@link cascading.pipe.Checkpoint} Tap.
125       */
126      public TextDelimited()
127        {
128        this( Fields.ALL );
129        }
130    
131      /**
132       * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking
133       * {@link Fields#ALL} and using TAB as the default delimiter.
134       * <p/>
135       * Use this constructor if the source and sink fields will be resolved during planning, for example, when using
136       * with a {@link cascading.pipe.Checkpoint} Tap.
137       *
138       * @param hasHeader
139       * @param delimiter
140       */
141      @ConstructorProperties({"hasHeader", "delimiter"})
142      public TextDelimited( boolean hasHeader, String delimiter )
143        {
144        this( Fields.ALL, hasHeader, delimiter, null, (Class[]) null );
145        }
146    
147      /**
148       * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking
149       * {@link Fields#ALL} and using TAB as the default delimiter.
150       * <p/>
151       * Use this constructor if the source and sink fields will be resolved during planning, for example, when using
152       * with a {@link cascading.pipe.Checkpoint} Tap.
153       *
154       * @param hasHeader
155       * @param delimiter
156       * @param quote
157       */
158      @ConstructorProperties({"hasHeader", "delimiter", "quote"})
159      public TextDelimited( boolean hasHeader, String delimiter, String quote )
160        {
161        this( Fields.ALL, hasHeader, delimiter, quote, (Class[]) null );
162        }
163    
164      /**
165       * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking
166       * {@link Fields#ALL} and using the given delimitedParser instance for parsing.
167       * <p/>
168       * Use this constructor if the source and sink fields will be resolved during planning, for example, when using
169       * with a {@link cascading.pipe.Checkpoint} Tap.
170       *
171       * @param hasHeader
172       * @param delimitedParser
173       */
174      @ConstructorProperties({"hasHeader", "delimitedParser"})
175      public TextDelimited( boolean hasHeader, DelimitedParser delimitedParser )
176        {
177        this( Fields.ALL, hasHeader, hasHeader, delimitedParser );
178        }
179    
180      /**
181       * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking
182       * {@link Fields#ALL} and using the given delimitedParser instance for parsing.
183       * <p/>
184       * Use this constructor if the source and sink fields will be resolved during planning, for example, when using
185       * with a {@link cascading.pipe.Checkpoint} Tap.
186       * <p/>
187       * This constructor will set {@code skipHeader} and {@code writeHeader} values to true.
188       *
189       * @param delimitedParser
190       */
191      @ConstructorProperties({"delimitedParser"})
192      public TextDelimited( DelimitedParser delimitedParser )
193        {
194        this( Fields.ALL, true, true, delimitedParser );
195        }
196    
197      /**
198       * Constructor TextDelimited creates a new TextDelimited instance with TAB as the default delimiter.
199       *
200       * @param fields of type Fields
201       */
202      @ConstructorProperties({"fields"})
203      public TextDelimited( Fields fields )
204        {
205        this( fields, "\t", null, null );
206        }
207    
208      /**
209       * Constructor TextDelimited creates a new TextDelimited instance.
210       *
211       * @param fields    of type Fields
212       * @param delimiter of type String
213       */
214      @ConstructorProperties({"fields", "delimiter"})
215      public TextDelimited( Fields fields, String delimiter )
216        {
217        this( fields, delimiter, null, null );
218        }
219    
220      /**
221       * Constructor TextDelimited creates a new TextDelimited instance.
222       *
223       * @param fields    of type Fields
224       * @param hasHeader of type boolean
225       * @param delimiter of type String
226       */
227      @ConstructorProperties({"fields", "hasHeader", "delimiter"})
228      public TextDelimited( Fields fields, boolean hasHeader, String delimiter )
229        {
230        this( fields, hasHeader, hasHeader, delimiter, null, null );
231        }
232    
233      /**
234       * Constructor TextDelimited creates a new TextDelimited instance.
235       *
236       * @param fields     of type Fields
237       * @param skipHeader of type boolean
238       * @param delimiter  of type String
239       */
240      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter"})
241      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter )
242        {
243        this( fields, skipHeader, writeHeader, delimiter, null, null );
244        }
245    
246      /**
247       * Constructor TextDelimited creates a new TextDelimited instance.
248       *
249       * @param fields    of type Fields
250       * @param delimiter of type String
251       * @param types     of type Class[]
252       */
253      @ConstructorProperties({"fields", "delimiter", "types"})
254      public TextDelimited( Fields fields, String delimiter, Class[] types )
255        {
256        this( fields, delimiter, null, types );
257        }
258    
259      /**
260       * Constructor TextDelimited creates a new TextDelimited instance.
261       *
262       * @param fields    of type Fields
263       * @param hasHeader of type boolean
264       * @param delimiter of type String
265       * @param types     of type Class[]
266       */
267      @ConstructorProperties({"fields", "hasHeader", "delimiter", "types"})
268      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, Class[] types )
269        {
270        this( fields, hasHeader, hasHeader, delimiter, null, types );
271        }
272    
273      /**
274       * Constructor TextDelimited creates a new TextDelimited instance.
275       *
276       * @param fields      of type Fields
277       * @param skipHeader  of type boolean
278       * @param writeHeader of type boolean
279       * @param delimiter   of type String
280       * @param types       of type Class[]
281       */
282      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "types"})
283      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, Class[] types )
284        {
285        this( fields, skipHeader, writeHeader, delimiter, null, types );
286        }
287    
288      /**
289       * Constructor TextDelimited creates a new TextDelimited instance.
290       *
291       * @param fields    of type Fields
292       * @param delimiter of type String
293       * @param quote     of type String
294       * @param types     of type Class[]
295       */
296      @ConstructorProperties({"fields", "delimiter", "quote", "types"})
297      public TextDelimited( Fields fields, String delimiter, String quote, Class[] types )
298        {
299        this( fields, false, delimiter, quote, types );
300        }
301    
302      /**
303       * Constructor TextDelimited creates a new TextDelimited instance.
304       *
305       * @param fields    of type Fields
306       * @param hasHeader of type boolean
307       * @param delimiter of type String
308       * @param quote     of type String
309       * @param types     of type Class[]
310       */
311      @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types"})
312      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types )
313        {
314        this( fields, hasHeader, hasHeader, delimiter, quote, types, true );
315        }
316    
317      /**
318       * Constructor TextDelimited creates a new TextDelimited instance.
319       *
320       * @param fields      of type Fields
321       * @param skipHeader  of type boolean
322       * @param writeHeader of type boolean
323       * @param delimiter   of type String
324       * @param quote       of type String
325       * @param types       of type Class[]
326       */
327      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "quote", "types"})
328      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types )
329        {
330        this( fields, skipHeader, writeHeader, delimiter, quote, types, true );
331        }
332    
333      /**
334       * Constructor TextDelimited creates a new TextDelimited instance.
335       *
336       * @param fields    of type Fields
337       * @param delimiter of type String
338       * @param quote     of type String
339       * @param types     of type Class[]
340       * @param safe      of type boolean
341       */
342      @ConstructorProperties({"fields", "delimiter", "quote", "types", "safe"})
343      public TextDelimited( Fields fields, String delimiter, String quote, Class[] types, boolean safe )
344        {
345        this( fields, false, delimiter, quote, types, safe );
346        }
347    
348      /**
349       * Constructor TextDelimited creates a new TextDelimited instance.
350       *
351       * @param fields    of type Fields
352       * @param hasHeader of type boolean
353       * @param delimiter of type String
354       * @param quote     of type String
355       * @param types     of type Class[]
356       * @param safe      of type boolean
357       */
358      @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types", "safe"})
359      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, boolean safe )
360        {
361        this( fields, hasHeader, hasHeader, delimiter, true, quote, types, safe );
362        }
363    
364      /**
365       * Constructor TextDelimited creates a new TextDelimited instance.
366       *
367       * @param fields      of type Fields
368       * @param hasHeader   of type boolean
369       * @param delimiter   of type String
370       * @param quote       of type String
371       * @param types       of type Class[]
372       * @param safe        of type boolean
373       * @param charsetName of type String
374       */
375      @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types", "safe", "charsetName"})
376      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, boolean safe, String charsetName )
377        {
378        this( fields, hasHeader, hasHeader, delimiter, true, quote, types, safe, charsetName );
379        }
380    
381      /**
382       * Constructor TextDelimited creates a new TextDelimited instance.
383       *
384       * @param fields      of type Fields
385       * @param skipHeader  of type boolean
386       * @param writeHeader of type boolean
387       * @param delimiter   of type String
388       * @param quote       of type String
389       * @param types       of type Class[]
390       * @param safe        of type boolean
391       */
392      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "quote", "types", "safe"})
393      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types, boolean safe )
394        {
395        this( fields, skipHeader, writeHeader, delimiter, true, quote, types, safe );
396        }
397    
398      /**
399       * Constructor TextDelimited creates a new TextDelimited instance.
400       *
401       * @param fields    of type Fields
402       * @param delimiter of type String
403       * @param quote     of type String
404       */
405      @ConstructorProperties({"fields", "delimiter", "quote"})
406      public TextDelimited( Fields fields, String delimiter, String quote )
407        {
408        this( fields, false, delimiter, quote, null, true );
409        }
410    
411      /**
412       * Constructor TextDelimited creates a new TextDelimited instance.
413       *
414       * @param fields    of type Fields
415       * @param hasHeader of type boolean
416       * @param delimiter of type String
417       * @param quote     of type String
418       */
419      @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote"})
420      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote )
421        {
422        this( fields, hasHeader, delimiter, quote, null, true );
423        }
424    
425      /**
426       * Constructor TextDelimited creates a new TextDelimited instance.
427       *
428       * @param fields      of type Fields
429       * @param hasHeader   of type boolean
430       * @param delimiter   of type String
431       * @param quote       of type String
432       * @param charsetName of type String
433       */
434      @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "charsetName"})
435      public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, String charsetName )
436        {
437        this( fields, hasHeader, delimiter, quote, null, true, charsetName );
438        }
439    
440      /**
441       * Constructor TextDelimited creates a new TextDelimited instance.
442       *
443       * @param fields      of type Fields
444       * @param skipHeader  of type boolean
445       * @param writeHeader of type boolean
446       * @param delimiter   of type String
447       * @param strict      of type boolean
448       * @param quote       of type String
449       * @param types       of type Class[]
450       * @param safe        of type boolean
451       */
452      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "strict", "quote", "types", "safe"})
453      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, boolean strict, String quote, Class[] types, boolean safe )
454        {
455        this( fields, skipHeader, writeHeader, delimiter, strict, quote, types, safe, DEFAULT_CHARSET );
456        }
457    
458      /**
459       * Constructor TextDelimited creates a new TextDelimited instance.
460       *
461       * @param fields      of type Fields
462       * @param skipHeader  of type boolean
463       * @param writeHeader of type boolean
464       * @param delimiter   of type String
465       * @param strict      of type boolean
466       * @param quote       of type String
467       * @param types       of type Class[]
468       * @param safe        of type boolean
469       * @param charsetName of type String
470       */
471      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "strict", "quote", "types", "safe",
472                              "charsetName"})
473      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, boolean strict, String quote, Class[] types, boolean safe, String charsetName )
474        {
475        this( fields, skipHeader, writeHeader, charsetName, new DelimitedParser( delimiter, quote, types, strict, safe ) );
476        }
477    
478      /**
479       * Constructor TextDelimited creates a new TextDelimited instance.
480       *
481       * @param fields          of type Fields
482       * @param writeHeader     of type boolean
483       * @param delimitedParser of type DelimitedParser
484       */
485      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimitedParser"})
486      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, DelimitedParser delimitedParser )
487        {
488        this( fields, skipHeader, writeHeader, null, delimitedParser );
489        }
490    
491      /**
492       * Constructor TextDelimited creates a new TextDelimited instance.
493       *
494       * @param fields          of type Fields
495       * @param hasHeader       of type boolean
496       * @param delimitedParser of type DelimitedParser
497       */
498      @ConstructorProperties({"fields", "hasHeader", "delimitedParser"})
499      public TextDelimited( Fields fields, boolean hasHeader, DelimitedParser delimitedParser )
500        {
501        this( fields, hasHeader, hasHeader, null, delimitedParser );
502        }
503    
504      /**
505       * Constructor TextDelimited creates a new TextDelimited instance.
506       *
507       * @param fields          of type Fields
508       * @param writeHeader     of type boolean
509       * @param charsetName     of type String
510       * @param delimitedParser of type DelimitedParser
511       */
512      @ConstructorProperties({"fields", "skipHeader", "writeHeader", "charsetName", "delimitedParser"})
513      public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String charsetName, DelimitedParser delimitedParser )
514        {
515        super( fields, fields );
516    
517        this.delimitedParser = delimitedParser;
518    
519        // normalizes ALL and UNKNOWN
520        // calls reset on delimitedParser
521        setSourceFields( fields );
522        setSinkFields( fields );
523    
524        this.skipHeader = skipHeader;
525        this.writeHeader = writeHeader;
526    
527        if( charsetName != null )
528          this.charsetName = charsetName;
529    
530        // throws an exception if not found
531        Charset.forName( this.charsetName );
532        }
533    
534      /**
535       * Method getDelimiter returns the delimiter used to parse fields from the current line of text.
536       *
537       * @return a String
538       */
539      public String getDelimiter()
540        {
541        return delimitedParser.getDelimiter();
542        }
543    
544      /**
545       * Method getQuote returns the quote string, if any, used to encapsulate each field in a line to delimited text.
546       *
547       * @return a String
548       */
549      public String getQuote()
550        {
551        return delimitedParser.getQuote();
552        }
553    
554      public LineNumberReader createInput( InputStream inputStream )
555        {
556        try
557          {
558          return new LineNumberReader( new InputStreamReader( inputStream, charsetName ) );
559          }
560        catch( UnsupportedEncodingException exception )
561          {
562          throw new TapException( exception );
563          }
564        }
565    
566      public PrintWriter createOutput( OutputStream outputStream )
567        {
568        try
569          {
570          return new PrintWriter( new OutputStreamWriter( outputStream, charsetName ) );
571          }
572        catch( UnsupportedEncodingException exception )
573          {
574          throw new TapException( exception );
575          }
576        }
577    
578      @Override
579      public void setSinkFields( Fields sinkFields )
580        {
581        super.setSourceFields( sinkFields );
582        super.setSinkFields( sinkFields );
583    
584        if( delimitedParser != null )
585          delimitedParser.reset( getSourceFields(), getSinkFields() );
586        }
587    
588      @Override
589      public void setSourceFields( Fields sourceFields )
590        {
591        super.setSourceFields( sourceFields );
592        super.setSinkFields( sourceFields );
593    
594        if( delimitedParser != null )
595          delimitedParser.reset( getSourceFields(), getSinkFields() );
596        }
597    
598      @Override
599      public boolean isSymmetrical()
600        {
601        return super.isSymmetrical() && skipHeader == writeHeader;
602        }
603    
604      @Override
605      public Fields retrieveSourceFields( FlowProcess<Properties> process, Tap tap )
606        {
607        if( !skipHeader || !getSourceFields().isUnknown() )
608          return getSourceFields();
609    
610        // no need to open them all
611        if( tap instanceof CompositeTap )
612          tap = (Tap) ( (CompositeTap) tap ).getChildTaps().next();
613    
614        tap = new FileTap( new TextLine( new Fields( "line" ), charsetName ), tap.getIdentifier() );
615    
616        setSourceFields( delimitedParser.parseFirstLine( process, tap ) );
617    
618        return getSourceFields();
619        }
620    
621      @Override
622      public void presentSourceFields( FlowProcess<Properties> process, Tap tap, Fields fields )
623        {
624        // do nothing
625        }
626    
627      @Override
628      public void presentSinkFields( FlowProcess<Properties> flowProcess, Tap tap, Fields fields )
629        {
630        if( writeHeader )
631          presentSinkFieldsInternal( fields );
632        }
633    
634      @Override
635      public void sourceConfInit( FlowProcess<Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf )
636        {
637        }
638    
639      @Override
640      public void sourcePrepare( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException
641        {
642        sourceCall.setContext( createInput( sourceCall.getInput() ) );
643    
644        sourceCall.getIncomingEntry().setTuple( TupleViews.createObjectArray() );
645        }
646    
647      @Override
648      public boolean source( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException
649        {
650        String line = sourceCall.getContext().readLine();
651    
652        if( line == null )
653          return false;
654    
655        if( skipHeader && sourceCall.getContext().getLineNumber() == 1 ) // todo: optimize this away
656          line = sourceCall.getContext().readLine();
657    
658        if( line == null )
659          return false;
660    
661        Object[] split = delimitedParser.parseLine( line );
662    
663        // assumption it is better to re-use than to construct new
664        Tuple tuple = sourceCall.getIncomingEntry().getTuple();
665    
666        TupleViews.reset( tuple, split );
667    
668        return true;
669        }
670    
671      @Override
672      public void sourceCleanup( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException
673        {
674        sourceCall.setContext( null );
675        }
676    
677      @Override
678      public void sinkConfInit( FlowProcess<Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf )
679        {
680        }
681    
682      @Override
683      public void sinkPrepare( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall )
684        {
685        sinkCall.setContext( createOutput( sinkCall.getOutput() ) );
686    
687        if( writeHeader )
688          {
689          Fields fields = sinkCall.getOutgoingEntry().getFields();
690          delimitedParser.joinFirstLine( fields, sinkCall.getContext() );
691    
692          sinkCall.getContext().println();
693          }
694        }
695    
696      @Override
697      public void sink( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException
698        {
699        TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
700    
701        Iterable<String> strings = tupleEntry.asIterableOf( String.class );
702    
703        delimitedParser.joinLine( strings, sinkCall.getContext() );
704    
705        sinkCall.getContext().println();
706        }
707    
708      @Override
709      public void sinkCleanup( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall )
710        {
711        sinkCall.getContext().flush();
712        sinkCall.setContext( null );
713        }
714      }