Class IlluminaBasecallsToFastq

    • Field Detail

      • BASECALLS_DIR

        @Argument(doc="The basecalls directory. ",
                  shortName="B")
        public File BASECALLS_DIR
      • BARCODES_DIR

        @Argument(doc="The barcodes directory with _barcode.txt files (generated by ExtractIlluminaBarcodes). If not set, use BASECALLS_DIR. ",
                  shortName="BCD",
                  optional=true)
        public File BARCODES_DIR
      • LANE

        @Argument(doc="Lane number. ",
                  shortName="L")
        public Integer LANE
      • OUTPUT_PREFIX

        @Argument(doc="The prefix for output FASTQs.  Extensions as described above are appended.  Use this option for a non-barcoded run, or for a barcoded run in which it is not desired to demultiplex reads into separate files by barcode.",
                  shortName="O",
                  mutex="MULTIPLEX_PARAMS")
        public File OUTPUT_PREFIX
      • RUN_BARCODE

        @Argument(doc="The barcode of the run.  Prefixed to read names.")
        public String RUN_BARCODE
      • MACHINE_NAME

        @Argument(doc="The name of the machine on which the run was sequenced; required if emitting Casava1.8-style read name headers",
                  optional=true)
        public String MACHINE_NAME
      • FLOWCELL_BARCODE

        @Argument(doc="The barcode of the flowcell that was sequenced; required if emitting Casava1.8-style read name headers",
                  optional=true)
        public String FLOWCELL_BARCODE
      • READ_STRUCTURE

        @Argument(doc="A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam assumes the  data to be in. It should consist of integer/character pairs describing the number of cycles and the type of those cycles (B for Sample Barcode, M for molecular barcode, T for Template, and S for skip).  E.g. If the input data consists of 80 base clusters and we provide a read structure of \"28T8M8B8S28T\" then the sequence may be split up into four reads:\n* read one with 28 cycles (bases) of template\n* read two with 8 cycles (bases) of molecular barcode (ex. unique molecular barcode)\n* read three with 8 cycles (bases) of sample barcode\n* 8 cycles (bases) skipped.\n* read four with 28 cycles (bases) of template\nThe skipped cycles would NOT be included in an output SAM/BAM file or in read groups therein.",
                  shortName="RS")
        public String READ_STRUCTURE
      • MULTIPLEX_PARAMS

        @Argument(doc="Tab-separated file for creating all output FASTQs demultiplexed by barcode for a lane with single IlluminaBasecallsToFastq invocation.  The columns are OUTPUT_PREFIX, and BARCODE_1, BARCODE_2 ... BARCODE_X where X = number of barcodes per cluster (optional).  Row with BARCODE_1 set to \'N\' is used to specify an output_prefix for no barcode match.",
                  mutex="OUTPUT_PREFIX")
        public File MULTIPLEX_PARAMS
      • NUM_PROCESSORS

        @Argument(doc="The number of threads to run in parallel. If NUM_PROCESSORS = 0, number of cores is automatically set to the number of cores available on the machine. If NUM_PROCESSORS < 0, then the number of cores used will be the number available on the machine less NUM_PROCESSORS.")
        public Integer NUM_PROCESSORS
      • FIRST_TILE

        @Argument(doc="If set, this is the first tile to be processed (used for debugging).  Note that tiles are not processed in numerical order.",
                  optional=true)
        public Integer FIRST_TILE
      • TILE_LIMIT

        @Argument(doc="If set, process no more than this many tiles (used for debugging).",
                  optional=true)
        public Integer TILE_LIMIT
      • APPLY_EAMSS_FILTER

        @Argument(doc="Apply EAMSS filtering to identify inappropriately quality scored bases towards the ends of reads and convert their quality scores to Q2.")
        public boolean APPLY_EAMSS_FILTER
      • FORCE_GC

        @Argument(doc="If true, call System.gc() periodically.  This is useful in cases in which the -Xmx value passed is larger than the available memory.")
        public Boolean FORCE_GC
      • MAX_READS_IN_RAM_PER_TILE

        @Argument(doc="Configure SortingCollections to store this many records before spilling to disk. For an indexed run, each SortingCollection gets this value/number of indices.")
        public int MAX_READS_IN_RAM_PER_TILE
      • MINIMUM_QUALITY

        @Argument(doc="The minimum quality (after transforming 0s to 1s) expected from reads.  If qualities are lower than this value, an error is thrown.The default of 2 is what the Illumina\'s spec describes as the minimum, but in practice the value has been observed lower.")
        public int MINIMUM_QUALITY
      • INCLUDE_NON_PF_READS

        @Argument(doc="Whether to include non-PF reads",
                  shortName="NONPF",
                  optional=true)
        public boolean INCLUDE_NON_PF_READS
      • IGNORE_UNEXPECTED_BARCODES

        @Argument(doc="Whether to ignore reads whose barcodes are not found in MULTIPLEX_PARAMS.  Useful when outputting FASTQs for only a subset of the barcodes in a lane.",
                  shortName="INGORE_UNEXPECTED")
        public boolean IGNORE_UNEXPECTED_BARCODES
      • READ_NAME_FORMAT

        @Argument(doc="The read name header formatting to emit.  Casava1.8 formatting has additional information beyond Illumina, including: the passing-filter flag value for the read, the flowcell name, and the sequencer name.")
        public IlluminaBasecallsToFastq.ReadNameFormat READ_NAME_FORMAT
      • COMPRESS_OUTPUTS

        @Argument(shortName="GZIP",
                  doc="Compress output FASTQ files using gzip and append a .gz extension to the file names.")
        public boolean COMPRESS_OUTPUTS
    • Constructor Detail

      • IlluminaBasecallsToFastq

        public IlluminaBasecallsToFastq()
    • Method Detail

      • doWork

        protected int doWork()
        Description copied from class: CommandLineProgram
        Do the work after command line has been parsed. RuntimeException may be thrown by this method, and are reported appropriately.
        Specified by:
        doWork in class CommandLineProgram
        Returns:
        program exit status.
      • customCommandLineValidation

        protected String[] customCommandLineValidation()
        Description copied from class: CommandLineProgram
        Put any custom command-line validation in an override of this method. clp is initialized at this point and can be used to print usage and access argv. Any options set by command-line parser can be validated.
        Overrides:
        customCommandLineValidation in class CommandLineProgram
        Returns:
        null if command line is valid. If command line is invalid, returns an array of error message to be written to the appropriate place.