001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.scheme.local; 022 023 import java.beans.ConstructorProperties; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.InputStreamReader; 027 import java.io.LineNumberReader; 028 import java.io.OutputStream; 029 import java.io.OutputStreamWriter; 030 import java.io.PrintWriter; 031 import java.io.UnsupportedEncodingException; 032 import java.nio.charset.Charset; 033 import java.util.Properties; 034 035 import cascading.flow.FlowProcess; 036 import cascading.scheme.Scheme; 037 import cascading.scheme.SinkCall; 038 import cascading.scheme.SourceCall; 039 import cascading.scheme.util.DelimitedParser; 040 import cascading.tap.CompositeTap; 041 import cascading.tap.Tap; 042 import cascading.tap.TapException; 043 import cascading.tap.local.FileTap; 044 import cascading.tuple.Fields; 045 import cascading.tuple.Tuple; 046 import cascading.tuple.TupleEntry; 047 import cascading.tuple.util.TupleViews; 048 049 /** 050 * Class TextDelimited provides direct support for delimited text files, like 051 * TAB (\t) or COMMA (,) delimited files. It also optionally allows for quoted values. 052 * <p/> 053 * TextDelimited may also be used to skip the "header" in a file, where the header is defined as the very first line 054 * in every input file. That is, if the byte offset of the current line from the input is zero (0), that line will 055 * be skipped. 056 * <p/> 057 * It is assumed if sink/source {@code fields} is set to either {@link Fields#ALL} or {@link Fields#UNKNOWN} and 058 * {@code skipHeader} or {@code hasHeader} is {@code true}, the field names will be retrieved from the header of the 059 * file and used during planning. The header will parsed with the same rules as the body of the file. 060 * <p/> 061 * By default headers are not skipped. 062 * <p/> 063 * TextDelimited may also be used to write a "header" in a file. The fields names for the header are taken directly 064 * from the declared fields. Or if the declared fields are {@link Fields#ALL} or {@link Fields#UNKNOWN}, the 065 * resolved field names will be used, if any. 066 * <p/> 067 * By default headers are not written. 068 * <p/> 069 * If {@code hasHeaders} is set to {@code true} on a constructor, both {@code skipHeader} and {@code writeHeader} will 070 * be set to {@code true}. 071 * <p/> 072 * By default this {@link cascading.scheme.Scheme} is both {@code strict} and {@code safe}. 073 * <p/> 074 * Strict meaning if a line of text does not parse into the expected number of fields, this class will throw a 075 * {@link TapException}. If strict is {@code false}, then {@link Tuple} will be returned with {@code null} values 076 * for the missing fields. 077 * <p/> 078 * Safe meaning if a field cannot be coerced into an expected type, a {@code null} will be used for the value. 079 * If safe is {@code false}, a {@link TapException} will be thrown. 080 * <p/> 081 * Also by default, {@code quote} strings are not searched for to improve processing speed. If a file is 082 * COMMA delimited but may have COMMA's in a value, the whole value should be surrounded by the quote string, typically 083 * double quotes ({@literal "}). 084 * <p/> 085 * Note all empty fields in a line will be returned as {@code null} unless coerced into a new type. 086 * <p/> 087 * This Scheme may source/sink {@link Fields#ALL}, when given on the constructor the new instance will automatically 088 * default to strict == false as the number of fields parsed are arbitrary or unknown. A type array may not be given 089 * either, so all values will be returned as Strings. 090 * <p/> 091 * By default, all text is encoded/decoded as UTF-8. This can be changed via the {@code charsetName} constructor 092 * argument. 093 * <p/> 094 * To override field and line parsing behaviors, sub-class {@link DelimitedParser} or provide a 095 * {@link cascading.scheme.util.FieldTypeResolver} implementation. 096 * <p/> 097 * Note that there should be no expectation that TextDelimited, or specifically {@link DelimitedParser}, can handle 098 * all delimited and quoted combinations reliably. Attempting to do so would impair its performance and maintainability. 099 * <p/> 100 * Further, it can be safely said any corrupted files will not be supported for obvious reasons. Corrupted files may 101 * result in exceptions or could cause edge cases in the underlying java regular expression engine. 102 * <p/> 103 * A large part of Cascading was designed to help users cleans data. Thus the recommendation is to create Flows that 104 * are responsible for cleansing large data-sets when faced with the problem 105 * <p/> 106 * DelimitedParser maybe sub-classed and extended if necessary. 107 * 108 * @see TextLine 109 */ 110 public class TextDelimited extends Scheme<Properties, InputStream, OutputStream, LineNumberReader, PrintWriter> 111 { 112 public static final String DEFAULT_CHARSET = "UTF-8"; 113 114 private final boolean skipHeader; 115 private final boolean writeHeader; 116 private final DelimitedParser delimitedParser; 117 private String charsetName = DEFAULT_CHARSET; 118 119 /** 120 * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking 121 * {@link Fields#ALL} and using TAB as the default delimiter. 122 * <p/> 123 * Use this constructor if the source and sink fields will be resolved during planning, for example, when using 124 * with a {@link cascading.pipe.Checkpoint} Tap. 125 */ 126 public TextDelimited() 127 { 128 this( Fields.ALL ); 129 } 130 131 /** 132 * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking 133 * {@link Fields#ALL} and using TAB as the default delimiter. 134 * <p/> 135 * Use this constructor if the source and sink fields will be resolved during planning, for example, when using 136 * with a {@link cascading.pipe.Checkpoint} Tap. 137 * 138 * @param hasHeader 139 * @param delimiter 140 */ 141 @ConstructorProperties({"hasHeader", "delimiter"}) 142 public TextDelimited( boolean hasHeader, String delimiter ) 143 { 144 this( Fields.ALL, hasHeader, delimiter, null, (Class[]) null ); 145 } 146 147 /** 148 * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking 149 * {@link Fields#ALL} and using TAB as the default delimiter. 150 * <p/> 151 * Use this constructor if the source and sink fields will be resolved during planning, for example, when using 152 * with a {@link cascading.pipe.Checkpoint} Tap. 153 * 154 * @param hasHeader 155 * @param delimiter 156 * @param quote 157 */ 158 @ConstructorProperties({"hasHeader", "delimiter", "quote"}) 159 public TextDelimited( boolean hasHeader, String delimiter, String quote ) 160 { 161 this( Fields.ALL, hasHeader, delimiter, quote, (Class[]) null ); 162 } 163 164 /** 165 * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking 166 * {@link Fields#ALL} and using the given delimitedParser instance for parsing. 167 * <p/> 168 * Use this constructor if the source and sink fields will be resolved during planning, for example, when using 169 * with a {@link cascading.pipe.Checkpoint} Tap. 170 * 171 * @param hasHeader 172 * @param delimitedParser 173 */ 174 @ConstructorProperties({"hasHeader", "delimitedParser"}) 175 public TextDelimited( boolean hasHeader, DelimitedParser delimitedParser ) 176 { 177 this( Fields.ALL, hasHeader, hasHeader, delimitedParser ); 178 } 179 180 /** 181 * Constructor TextDelimited creates a new TextDelimited instance sourcing {@link Fields#UNKNOWN}, sinking 182 * {@link Fields#ALL} and using the given delimitedParser instance for parsing. 183 * <p/> 184 * Use this constructor if the source and sink fields will be resolved during planning, for example, when using 185 * with a {@link cascading.pipe.Checkpoint} Tap. 186 * <p/> 187 * This constructor will set {@code skipHeader} and {@code writeHeader} values to true. 188 * 189 * @param delimitedParser 190 */ 191 @ConstructorProperties({"delimitedParser"}) 192 public TextDelimited( DelimitedParser delimitedParser ) 193 { 194 this( Fields.ALL, true, true, delimitedParser ); 195 } 196 197 /** 198 * Constructor TextDelimited creates a new TextDelimited instance with TAB as the default delimiter. 199 * 200 * @param fields of type Fields 201 */ 202 @ConstructorProperties({"fields"}) 203 public TextDelimited( Fields fields ) 204 { 205 this( fields, "\t", null, null ); 206 } 207 208 /** 209 * Constructor TextDelimited creates a new TextDelimited instance. 210 * 211 * @param fields of type Fields 212 * @param delimiter of type String 213 */ 214 @ConstructorProperties({"fields", "delimiter"}) 215 public TextDelimited( Fields fields, String delimiter ) 216 { 217 this( fields, delimiter, null, null ); 218 } 219 220 /** 221 * Constructor TextDelimited creates a new TextDelimited instance. 222 * 223 * @param fields of type Fields 224 * @param hasHeader of type boolean 225 * @param delimiter of type String 226 */ 227 @ConstructorProperties({"fields", "hasHeader", "delimiter"}) 228 public TextDelimited( Fields fields, boolean hasHeader, String delimiter ) 229 { 230 this( fields, hasHeader, hasHeader, delimiter, null, null ); 231 } 232 233 /** 234 * Constructor TextDelimited creates a new TextDelimited instance. 235 * 236 * @param fields of type Fields 237 * @param skipHeader of type boolean 238 * @param delimiter of type String 239 */ 240 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter"}) 241 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter ) 242 { 243 this( fields, skipHeader, writeHeader, delimiter, null, null ); 244 } 245 246 /** 247 * Constructor TextDelimited creates a new TextDelimited instance. 248 * 249 * @param fields of type Fields 250 * @param delimiter of type String 251 * @param types of type Class[] 252 */ 253 @ConstructorProperties({"fields", "delimiter", "types"}) 254 public TextDelimited( Fields fields, String delimiter, Class[] types ) 255 { 256 this( fields, delimiter, null, types ); 257 } 258 259 /** 260 * Constructor TextDelimited creates a new TextDelimited instance. 261 * 262 * @param fields of type Fields 263 * @param hasHeader of type boolean 264 * @param delimiter of type String 265 * @param types of type Class[] 266 */ 267 @ConstructorProperties({"fields", "hasHeader", "delimiter", "types"}) 268 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, Class[] types ) 269 { 270 this( fields, hasHeader, hasHeader, delimiter, null, types ); 271 } 272 273 /** 274 * Constructor TextDelimited creates a new TextDelimited instance. 275 * 276 * @param fields of type Fields 277 * @param skipHeader of type boolean 278 * @param writeHeader of type boolean 279 * @param delimiter of type String 280 * @param types of type Class[] 281 */ 282 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "types"}) 283 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, Class[] types ) 284 { 285 this( fields, skipHeader, writeHeader, delimiter, null, types ); 286 } 287 288 /** 289 * Constructor TextDelimited creates a new TextDelimited instance. 290 * 291 * @param fields of type Fields 292 * @param delimiter of type String 293 * @param quote of type String 294 * @param types of type Class[] 295 */ 296 @ConstructorProperties({"fields", "delimiter", "quote", "types"}) 297 public TextDelimited( Fields fields, String delimiter, String quote, Class[] types ) 298 { 299 this( fields, false, delimiter, quote, types ); 300 } 301 302 /** 303 * Constructor TextDelimited creates a new TextDelimited instance. 304 * 305 * @param fields of type Fields 306 * @param hasHeader of type boolean 307 * @param delimiter of type String 308 * @param quote of type String 309 * @param types of type Class[] 310 */ 311 @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types"}) 312 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types ) 313 { 314 this( fields, hasHeader, hasHeader, delimiter, quote, types, true ); 315 } 316 317 /** 318 * Constructor TextDelimited creates a new TextDelimited instance. 319 * 320 * @param fields of type Fields 321 * @param skipHeader of type boolean 322 * @param writeHeader of type boolean 323 * @param delimiter of type String 324 * @param quote of type String 325 * @param types of type Class[] 326 */ 327 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "quote", "types"}) 328 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types ) 329 { 330 this( fields, skipHeader, writeHeader, delimiter, quote, types, true ); 331 } 332 333 /** 334 * Constructor TextDelimited creates a new TextDelimited instance. 335 * 336 * @param fields of type Fields 337 * @param delimiter of type String 338 * @param quote of type String 339 * @param types of type Class[] 340 * @param safe of type boolean 341 */ 342 @ConstructorProperties({"fields", "delimiter", "quote", "types", "safe"}) 343 public TextDelimited( Fields fields, String delimiter, String quote, Class[] types, boolean safe ) 344 { 345 this( fields, false, delimiter, quote, types, safe ); 346 } 347 348 /** 349 * Constructor TextDelimited creates a new TextDelimited instance. 350 * 351 * @param fields of type Fields 352 * @param hasHeader of type boolean 353 * @param delimiter of type String 354 * @param quote of type String 355 * @param types of type Class[] 356 * @param safe of type boolean 357 */ 358 @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types", "safe"}) 359 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, boolean safe ) 360 { 361 this( fields, hasHeader, hasHeader, delimiter, true, quote, types, safe ); 362 } 363 364 /** 365 * Constructor TextDelimited creates a new TextDelimited instance. 366 * 367 * @param fields of type Fields 368 * @param hasHeader of type boolean 369 * @param delimiter of type String 370 * @param quote of type String 371 * @param types of type Class[] 372 * @param safe of type boolean 373 * @param charsetName of type String 374 */ 375 @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "types", "safe", "charsetName"}) 376 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, boolean safe, String charsetName ) 377 { 378 this( fields, hasHeader, hasHeader, delimiter, true, quote, types, safe, charsetName ); 379 } 380 381 /** 382 * Constructor TextDelimited creates a new TextDelimited instance. 383 * 384 * @param fields of type Fields 385 * @param skipHeader of type boolean 386 * @param writeHeader of type boolean 387 * @param delimiter of type String 388 * @param quote of type String 389 * @param types of type Class[] 390 * @param safe of type boolean 391 */ 392 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "quote", "types", "safe"}) 393 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types, boolean safe ) 394 { 395 this( fields, skipHeader, writeHeader, delimiter, true, quote, types, safe ); 396 } 397 398 /** 399 * Constructor TextDelimited creates a new TextDelimited instance. 400 * 401 * @param fields of type Fields 402 * @param delimiter of type String 403 * @param quote of type String 404 */ 405 @ConstructorProperties({"fields", "delimiter", "quote"}) 406 public TextDelimited( Fields fields, String delimiter, String quote ) 407 { 408 this( fields, false, delimiter, quote, null, true ); 409 } 410 411 /** 412 * Constructor TextDelimited creates a new TextDelimited instance. 413 * 414 * @param fields of type Fields 415 * @param hasHeader of type boolean 416 * @param delimiter of type String 417 * @param quote of type String 418 */ 419 @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote"}) 420 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote ) 421 { 422 this( fields, hasHeader, delimiter, quote, null, true ); 423 } 424 425 /** 426 * Constructor TextDelimited creates a new TextDelimited instance. 427 * 428 * @param fields of type Fields 429 * @param hasHeader of type boolean 430 * @param delimiter of type String 431 * @param quote of type String 432 * @param charsetName of type String 433 */ 434 @ConstructorProperties({"fields", "hasHeader", "delimiter", "quote", "charsetName"}) 435 public TextDelimited( Fields fields, boolean hasHeader, String delimiter, String quote, String charsetName ) 436 { 437 this( fields, hasHeader, delimiter, quote, null, true, charsetName ); 438 } 439 440 /** 441 * Constructor TextDelimited creates a new TextDelimited instance. 442 * 443 * @param fields of type Fields 444 * @param skipHeader of type boolean 445 * @param writeHeader of type boolean 446 * @param delimiter of type String 447 * @param strict of type boolean 448 * @param quote of type String 449 * @param types of type Class[] 450 * @param safe of type boolean 451 */ 452 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "strict", "quote", "types", "safe"}) 453 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, boolean strict, String quote, Class[] types, boolean safe ) 454 { 455 this( fields, skipHeader, writeHeader, delimiter, strict, quote, types, safe, DEFAULT_CHARSET ); 456 } 457 458 /** 459 * Constructor TextDelimited creates a new TextDelimited instance. 460 * 461 * @param fields of type Fields 462 * @param skipHeader of type boolean 463 * @param writeHeader of type boolean 464 * @param delimiter of type String 465 * @param strict of type boolean 466 * @param quote of type String 467 * @param types of type Class[] 468 * @param safe of type boolean 469 * @param charsetName of type String 470 */ 471 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimiter", "strict", "quote", "types", "safe", 472 "charsetName"}) 473 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, boolean strict, String quote, Class[] types, boolean safe, String charsetName ) 474 { 475 this( fields, skipHeader, writeHeader, charsetName, new DelimitedParser( delimiter, quote, types, strict, safe ) ); 476 } 477 478 /** 479 * Constructor TextDelimited creates a new TextDelimited instance. 480 * 481 * @param fields of type Fields 482 * @param writeHeader of type boolean 483 * @param delimitedParser of type DelimitedParser 484 */ 485 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "delimitedParser"}) 486 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, DelimitedParser delimitedParser ) 487 { 488 this( fields, skipHeader, writeHeader, null, delimitedParser ); 489 } 490 491 /** 492 * Constructor TextDelimited creates a new TextDelimited instance. 493 * 494 * @param fields of type Fields 495 * @param hasHeader of type boolean 496 * @param delimitedParser of type DelimitedParser 497 */ 498 @ConstructorProperties({"fields", "hasHeader", "delimitedParser"}) 499 public TextDelimited( Fields fields, boolean hasHeader, DelimitedParser delimitedParser ) 500 { 501 this( fields, hasHeader, hasHeader, null, delimitedParser ); 502 } 503 504 /** 505 * Constructor TextDelimited creates a new TextDelimited instance. 506 * 507 * @param fields of type Fields 508 * @param writeHeader of type boolean 509 * @param charsetName of type String 510 * @param delimitedParser of type DelimitedParser 511 */ 512 @ConstructorProperties({"fields", "skipHeader", "writeHeader", "charsetName", "delimitedParser"}) 513 public TextDelimited( Fields fields, boolean skipHeader, boolean writeHeader, String charsetName, DelimitedParser delimitedParser ) 514 { 515 super( fields, fields ); 516 517 this.delimitedParser = delimitedParser; 518 519 // normalizes ALL and UNKNOWN 520 // calls reset on delimitedParser 521 setSourceFields( fields ); 522 setSinkFields( fields ); 523 524 this.skipHeader = skipHeader; 525 this.writeHeader = writeHeader; 526 527 if( charsetName != null ) 528 this.charsetName = charsetName; 529 530 // throws an exception if not found 531 Charset.forName( this.charsetName ); 532 } 533 534 /** 535 * Method getDelimiter returns the delimiter used to parse fields from the current line of text. 536 * 537 * @return a String 538 */ 539 public String getDelimiter() 540 { 541 return delimitedParser.getDelimiter(); 542 } 543 544 /** 545 * Method getQuote returns the quote string, if any, used to encapsulate each field in a line to delimited text. 546 * 547 * @return a String 548 */ 549 public String getQuote() 550 { 551 return delimitedParser.getQuote(); 552 } 553 554 public LineNumberReader createInput( InputStream inputStream ) 555 { 556 try 557 { 558 return new LineNumberReader( new InputStreamReader( inputStream, charsetName ) ); 559 } 560 catch( UnsupportedEncodingException exception ) 561 { 562 throw new TapException( exception ); 563 } 564 } 565 566 public PrintWriter createOutput( OutputStream outputStream ) 567 { 568 try 569 { 570 return new PrintWriter( new OutputStreamWriter( outputStream, charsetName ) ); 571 } 572 catch( UnsupportedEncodingException exception ) 573 { 574 throw new TapException( exception ); 575 } 576 } 577 578 @Override 579 public void setSinkFields( Fields sinkFields ) 580 { 581 super.setSourceFields( sinkFields ); 582 super.setSinkFields( sinkFields ); 583 584 if( delimitedParser != null ) 585 delimitedParser.reset( getSourceFields(), getSinkFields() ); 586 } 587 588 @Override 589 public void setSourceFields( Fields sourceFields ) 590 { 591 super.setSourceFields( sourceFields ); 592 super.setSinkFields( sourceFields ); 593 594 if( delimitedParser != null ) 595 delimitedParser.reset( getSourceFields(), getSinkFields() ); 596 } 597 598 @Override 599 public boolean isSymmetrical() 600 { 601 return super.isSymmetrical() && skipHeader == writeHeader; 602 } 603 604 @Override 605 public Fields retrieveSourceFields( FlowProcess<Properties> process, Tap tap ) 606 { 607 if( !skipHeader || !getSourceFields().isUnknown() ) 608 return getSourceFields(); 609 610 // no need to open them all 611 if( tap instanceof CompositeTap ) 612 tap = (Tap) ( (CompositeTap) tap ).getChildTaps().next(); 613 614 tap = new FileTap( new TextLine( new Fields( "line" ), charsetName ), tap.getIdentifier() ); 615 616 setSourceFields( delimitedParser.parseFirstLine( process, tap ) ); 617 618 return getSourceFields(); 619 } 620 621 @Override 622 public void presentSourceFields( FlowProcess<Properties> process, Tap tap, Fields fields ) 623 { 624 // do nothing 625 } 626 627 @Override 628 public void presentSinkFields( FlowProcess<Properties> flowProcess, Tap tap, Fields fields ) 629 { 630 if( writeHeader ) 631 presentSinkFieldsInternal( fields ); 632 } 633 634 @Override 635 public void sourceConfInit( FlowProcess<Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf ) 636 { 637 } 638 639 @Override 640 public void sourcePrepare( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException 641 { 642 sourceCall.setContext( createInput( sourceCall.getInput() ) ); 643 644 sourceCall.getIncomingEntry().setTuple( TupleViews.createObjectArray() ); 645 } 646 647 @Override 648 public boolean source( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException 649 { 650 String line = sourceCall.getContext().readLine(); 651 652 if( line == null ) 653 return false; 654 655 if( skipHeader && sourceCall.getContext().getLineNumber() == 1 ) // todo: optimize this away 656 line = sourceCall.getContext().readLine(); 657 658 if( line == null ) 659 return false; 660 661 Object[] split = delimitedParser.parseLine( line ); 662 663 // assumption it is better to re-use than to construct new 664 Tuple tuple = sourceCall.getIncomingEntry().getTuple(); 665 666 TupleViews.reset( tuple, split ); 667 668 return true; 669 } 670 671 @Override 672 public void sourceCleanup( FlowProcess<Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException 673 { 674 sourceCall.setContext( null ); 675 } 676 677 @Override 678 public void sinkConfInit( FlowProcess<Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf ) 679 { 680 } 681 682 @Override 683 public void sinkPrepare( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) 684 { 685 sinkCall.setContext( createOutput( sinkCall.getOutput() ) ); 686 687 if( writeHeader ) 688 { 689 Fields fields = sinkCall.getOutgoingEntry().getFields(); 690 delimitedParser.joinFirstLine( fields, sinkCall.getContext() ); 691 692 sinkCall.getContext().println(); 693 } 694 } 695 696 @Override 697 public void sink( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException 698 { 699 TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); 700 701 Iterable<String> strings = tupleEntry.asIterableOf( String.class ); 702 703 delimitedParser.joinLine( strings, sinkCall.getContext() ); 704 705 sinkCall.getContext().println(); 706 } 707 708 @Override 709 public void sinkCleanup( FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) 710 { 711 sinkCall.getContext().flush(); 712 sinkCall.setContext( null ); 713 } 714 }