| 1 |
|
#!/usr/local/bin/ruby -w
|
| 2 |
|
|
| 3 |
|
# = faster_csv.rb -- Faster CSV Reading and Writing
|
| 4 |
|
#
|
| 5 |
|
# Created by James Edward Gray II on 2005-10-31.
|
| 6 |
|
# Copyright 2005 Gray Productions. All rights reserved.
|
| 7 |
|
#
|
| 8 |
|
# See FasterCSV for documentation.
|
| 9 |
|
|
| 10 |
|
if RUBY_VERSION >= "1.9"
|
| 11 |
|
abort <<-VERSION_WARNING.gsub(/^\s+/, "")
|
| 12 |
|
Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
|
| 13 |
|
support for Ruby 1.9's m17n encoding engine.
|
| 14 |
|
VERSION_WARNING
|
| 15 |
|
end
|
| 16 |
|
|
| 17 |
|
require "forwardable"
|
| 18 |
|
require "English"
|
| 19 |
|
require "enumerator"
|
| 20 |
|
require "date"
|
| 21 |
|
require "stringio"
|
| 22 |
|
|
| 23 |
|
#
|
| 24 |
|
# This class provides a complete interface to CSV files and data. It offers
|
| 25 |
|
# tools to enable you to read and write to and from Strings or IO objects, as
|
| 26 |
|
# needed.
|
| 27 |
|
#
|
| 28 |
|
# == Reading
|
| 29 |
|
#
|
| 30 |
|
# === From a File
|
| 31 |
|
#
|
| 32 |
|
# ==== A Line at a Time
|
| 33 |
|
#
|
| 34 |
|
# FasterCSV.foreach("path/to/file.csv") do |row|
|
| 35 |
|
# # use row here...
|
| 36 |
|
# end
|
| 37 |
|
#
|
| 38 |
|
# ==== All at Once
|
| 39 |
|
#
|
| 40 |
|
# arr_of_arrs = FasterCSV.read("path/to/file.csv")
|
| 41 |
|
#
|
| 42 |
|
# === From a String
|
| 43 |
|
#
|
| 44 |
|
# ==== A Line at a Time
|
| 45 |
|
#
|
| 46 |
|
# FasterCSV.parse("CSV,data,String") do |row|
|
| 47 |
|
# # use row here...
|
| 48 |
|
# end
|
| 49 |
|
#
|
| 50 |
|
# ==== All at Once
|
| 51 |
|
#
|
| 52 |
|
# arr_of_arrs = FasterCSV.parse("CSV,data,String")
|
| 53 |
|
#
|
| 54 |
|
# == Writing
|
| 55 |
|
#
|
| 56 |
|
# === To a File
|
| 57 |
|
#
|
| 58 |
|
# FasterCSV.open("path/to/file.csv", "w") do |csv|
|
| 59 |
|
# csv << ["row", "of", "CSV", "data"]
|
| 60 |
|
# csv << ["another", "row"]
|
| 61 |
|
# # ...
|
| 62 |
|
# end
|
| 63 |
|
#
|
| 64 |
|
# === To a String
|
| 65 |
|
#
|
| 66 |
|
# csv_string = FasterCSV.generate do |csv|
|
| 67 |
|
# csv << ["row", "of", "CSV", "data"]
|
| 68 |
|
# csv << ["another", "row"]
|
| 69 |
|
# # ...
|
| 70 |
|
# end
|
| 71 |
|
#
|
| 72 |
|
# == Convert a Single Line
|
| 73 |
|
#
|
| 74 |
|
# csv_string = ["CSV", "data"].to_csv # to CSV
|
| 75 |
|
# csv_array = "CSV,String".parse_csv # from CSV
|
| 76 |
|
#
|
| 77 |
|
# == Shortcut Interface
|
| 78 |
|
#
|
| 79 |
|
# FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
|
| 80 |
|
# FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
|
| 81 |
|
# FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
|
| 82 |
|
#
|
| 83 |
|
class FasterCSV
|
| 84 |
|
# The version of the installed library.
|
| 85 |
|
VERSION = "1.5.0".freeze
|
| 86 |
|
|
| 87 |
|
#
|
| 88 |
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
| 89 |
|
# fields and allows duplicates just as an Array would, but also allows you to
|
| 90 |
|
# access fields by name just as you could if they were in a Hash.
|
| 91 |
|
#
|
| 92 |
|
# All rows returned by FasterCSV will be constructed from this class, if
|
| 93 |
|
# header row processing is activated.
|
| 94 |
|
#
|
| 95 |
|
class Row
|
| 96 |
|
#
|
| 97 |
|
# Construct a new FasterCSV::Row from +headers+ and +fields+, which are
|
| 98 |
|
# expected to be Arrays. If one Array is shorter than the other, it will be
|
| 99 |
|
# padded with +nil+ objects.
|
| 100 |
|
#
|
| 101 |
|
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
| 102 |
|
# FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
|
| 103 |
|
# a header row. Otherwise, the row is assumes to be a field row.
|
| 104 |
|
#
|
| 105 |
|
# A FasterCSV::Row object supports the following Array methods through
|
| 106 |
|
# delegation:
|
| 107 |
|
#
|
| 108 |
|
# * empty?()
|
| 109 |
|
# * length()
|
| 110 |
|
# * size()
|
| 111 |
|
#
|
| 112 |
|
def initialize(headers, fields, header_row = false)
|
| 113 |
|
@header_row = header_row
|
| 114 |
|
|
| 115 |
|
# handle extra headers or fields
|
| 116 |
|
@row = if headers.size > fields.size
|
| 117 |
|
headers.zip(fields)
|
| 118 |
|
else
|
| 119 |
|
fields.zip(headers).map { |pair| pair.reverse }
|
| 120 |
|
end
|
| 121 |
|
end
|
| 122 |
|
|
| 123 |
|
# Internal data format used to compare equality.
|
| 124 |
|
attr_reader :row
|
| 125 |
|
protected :row
|
| 126 |
|
|
| 127 |
|
### Array Delegation ###
|
| 128 |
|
|
| 129 |
|
extend Forwardable
|
| 130 |
|
def_delegators :@row, :empty?, :length, :size
|
| 131 |
|
|
| 132 |
|
# Returns +true+ if this is a header row.
|
| 133 |
|
def header_row?
|
| 134 |
|
@header_row
|
| 135 |
|
end
|
| 136 |
|
|
| 137 |
|
# Returns +true+ if this is a field row.
|
| 138 |
|
def field_row?
|
| 139 |
|
not header_row?
|
| 140 |
|
end
|
| 141 |
|
|
| 142 |
|
# Returns the headers of this row.
|
| 143 |
|
def headers
|
| 144 |
|
@row.map { |pair| pair.first }
|
| 145 |
|
end
|
| 146 |
|
|
| 147 |
|
#
|
| 148 |
|
# :call-seq:
|
| 149 |
|
# field( header )
|
| 150 |
|
# field( header, offset )
|
| 151 |
|
# field( index )
|
| 152 |
|
#
|
| 153 |
|
# This method will fetch the field value by +header+ or +index+. If a field
|
| 154 |
|
# is not found, +nil+ is returned.
|
| 155 |
|
#
|
| 156 |
|
# When provided, +offset+ ensures that a header match occurrs on or later
|
| 157 |
|
# than the +offset+ index. You can use this to find duplicate headers,
|
| 158 |
|
# without resorting to hard-coding exact indices.
|
| 159 |
|
#
|
| 160 |
|
def field(header_or_index, minimum_index = 0)
|
| 161 |
|
# locate the pair
|
| 162 |
|
finder = header_or_index.is_a?(Integer) ? :[] : :assoc
|
| 163 |
|
pair = @row[minimum_index..-1].send(finder, header_or_index)
|
| 164 |
|
|
| 165 |
|
# return the field if we have a pair
|
| 166 |
|
pair.nil? ? nil : pair.last
|
| 167 |
|
end
|
| 168 |
|
alias_method :[], :field
|
| 169 |
|
|
| 170 |
|
#
|
| 171 |
|
# :call-seq:
|
| 172 |
|
# []=( header, value )
|
| 173 |
|
# []=( header, offset, value )
|
| 174 |
|
# []=( index, value )
|
| 175 |
|
#
|
| 176 |
|
# Looks up the field by the semantics described in FasterCSV::Row.field()
|
| 177 |
|
# and assigns the +value+.
|
| 178 |
|
#
|
| 179 |
|
# Assigning past the end of the row with an index will set all pairs between
|
| 180 |
|
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
|
| 181 |
|
# pair.
|
| 182 |
|
#
|
| 183 |
|
def []=(*args)
|
| 184 |
|
value = args.pop
|
| 185 |
|
|
| 186 |
|
if args.first.is_a? Integer
|
| 187 |
|
if @row[args.first].nil? # extending past the end with index
|
| 188 |
|
@row[args.first] = [nil, value]
|
| 189 |
|
@row.map! { |pair| pair.nil? ? [nil, nil] : pair }
|
| 190 |
|
else # normal index assignment
|
| 191 |
|
@row[args.first][1] = value
|
| 192 |
|
end
|
| 193 |
|
else
|
| 194 |
|
index = index(*args)
|
| 195 |
|
if index.nil? # appending a field
|
| 196 |
|
self << [args.first, value]
|
| 197 |
|
else # normal header assignment
|
| 198 |
|
@row[index][1] = value
|
| 199 |
|
end
|
| 200 |
|
end
|
| 201 |
|
end
|
| 202 |
|
|
| 203 |
|
#
|
| 204 |
|
# :call-seq:
|
| 205 |
|
# <<( field )
|
| 206 |
|
# <<( header_and_field_array )
|
| 207 |
|
# <<( header_and_field_hash )
|
| 208 |
|
#
|
| 209 |
|
# If a two-element Array is provided, it is assumed to be a header and field
|
| 210 |
|
# and the pair is appended. A Hash works the same way with the key being
|
| 211 |
|
# the header and the value being the field. Anything else is assumed to be
|
| 212 |
|
# a lone field which is appended with a +nil+ header.
|
| 213 |
|
#
|
| 214 |
|
# This method returns the row for chaining.
|
| 215 |
|
#
|
| 216 |
|
def <<(arg)
|
| 217 |
|
if arg.is_a?(Array) and arg.size == 2 # appending a header and name
|
| 218 |
|
@row << arg
|
| 219 |
|
elsif arg.is_a?(Hash) # append header and name pairs
|
| 220 |
|
arg.each { |pair| @row << pair }
|
| 221 |
|
else # append field value
|
| 222 |
|
@row << [nil, arg]
|
| 223 |
|
end
|
| 224 |
|
|
| 225 |
|
self # for chaining
|
| 226 |
|
end
|
| 227 |
|
|
| 228 |
|
#
|
| 229 |
|
# A shortcut for appending multiple fields. Equivalent to:
|
| 230 |
|
#
|
| 231 |
|
# args.each { |arg| faster_csv_row << arg }
|
| 232 |
|
#
|
| 233 |
|
# This method returns the row for chaining.
|
| 234 |
|
#
|
| 235 |
|
def push(*args)
|
| 236 |
|
args.each { |arg| self << arg }
|
| 237 |
|
|
| 238 |
|
self # for chaining
|
| 239 |
|
end
|
| 240 |
|
|
| 241 |
|
#
|
| 242 |
|
# :call-seq:
|
| 243 |
|
# delete( header )
|
| 244 |
|
# delete( header, offset )
|
| 245 |
|
# delete( index )
|
| 246 |
|
#
|
| 247 |
|
# Used to remove a pair from the row by +header+ or +index+. The pair is
|
| 248 |
|
# located as described in FasterCSV::Row.field(). The deleted pair is
|
| 249 |
|
# returned, or +nil+ if a pair could not be found.
|
| 250 |
|
#
|
| 251 |
|
def delete(header_or_index, minimum_index = 0)
|
| 252 |
|
if header_or_index.is_a? Integer # by index
|
| 253 |
|
@row.delete_at(header_or_index)
|
| 254 |
|
else # by header
|
| 255 |
|
@row.delete_at(index(header_or_index, minimum_index))
|
| 256 |
|
end
|
| 257 |
|
end
|
| 258 |
|
|
| 259 |
|
#
|
| 260 |
|
# The provided +block+ is passed a header and field for each pair in the row
|
| 261 |
|
# and expected to return +true+ or +false+, depending on whether the pair
|
| 262 |
|
# should be deleted.
|
| 263 |
|
#
|
| 264 |
|
# This method returns the row for chaining.
|
| 265 |
|
#
|
| 266 |
|
def delete_if(&block)
|
| 267 |
|
@row.delete_if(&block)
|
| 268 |
|
|
| 269 |
|
self # for chaining
|
| 270 |
|
end
|
| 271 |
|
|
| 272 |
|
#
|
| 273 |
|
# This method accepts any number of arguments which can be headers, indices,
|
| 274 |
|
# Ranges of either, or two-element Arrays containing a header and offset.
|
| 275 |
|
# Each argument will be replaced with a field lookup as described in
|
| 276 |
|
# FasterCSV::Row.field().
|
| 277 |
|
#
|
| 278 |
|
# If called with no arguments, all fields are returned.
|
| 279 |
|
#
|
| 280 |
|
def fields(*headers_and_or_indices)
|
| 281 |
|
if headers_and_or_indices.empty? # return all fields--no arguments
|
| 282 |
|
@row.map { |pair| pair.last }
|
| 283 |
|
else # or work like values_at()
|
| 284 |
|
headers_and_or_indices.inject(Array.new) do |all, h_or_i|
|
| 285 |
|
all + if h_or_i.is_a? Range
|
| 286 |
|
index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
|
| 287 |
|
index(h_or_i.begin)
|
| 288 |
|
index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
|
| 289 |
|
index(h_or_i.end)
|
| 290 |
|
new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
|
| 291 |
|
(index_begin..index_end)
|
| 292 |
|
fields.values_at(new_range)
|
| 293 |
|
else
|
| 294 |
|
[field(*Array(h_or_i))]
|
| 295 |
|
end
|
| 296 |
|
end
|
| 297 |
|
end
|
| 298 |
|
end
|
| 299 |
|
alias_method :values_at, :fields
|
| 300 |
|
|
| 301 |
|
#
|
| 302 |
|
# :call-seq:
|
| 303 |
|
# index( header )
|
| 304 |
|
# index( header, offset )
|
| 305 |
|
#
|
| 306 |
|
# This method will return the index of a field with the provided +header+.
|
| 307 |
|
# The +offset+ can be used to locate duplicate header names, as described in
|
| 308 |
|
# FasterCSV::Row.field().
|
| 309 |
|
#
|
| 310 |
|
def index(header, minimum_index = 0)
|
| 311 |
|
# find the pair
|
| 312 |
|
index = headers[minimum_index..-1].index(header)
|
| 313 |
|
# return the index at the right offset, if we found one
|
| 314 |
|
index.nil? ? nil : index + minimum_index
|
| 315 |
|
end
|
| 316 |
|
|
| 317 |
|
# Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
|
| 318 |
|
def header?(name)
|
| 319 |
|
headers.include? name
|
| 320 |
|
end
|
| 321 |
|
alias_method :include?, :header?
|
| 322 |
|
|
| 323 |
|
#
|
| 324 |
|
# Returns +true+ if +data+ matches a field in this row, and +false+
|
| 325 |
|
# otherwise.
|
| 326 |
|
#
|
| 327 |
|
def field?(data)
|
| 328 |
|
fields.include? data
|
| 329 |
|
end
|
| 330 |
|
|
| 331 |
|
include Enumerable
|
| 332 |
|
|
| 333 |
|
#
|
| 334 |
|
# Yields each pair of the row as header and field tuples (much like
|
| 335 |
|
# iterating over a Hash).
|
| 336 |
|
#
|
| 337 |
|
# Support for Enumerable.
|
| 338 |
|
#
|
| 339 |
|
# This method returns the row for chaining.
|
| 340 |
|
#
|
| 341 |
|
def each(&block)
|
| 342 |
|
@row.each(&block)
|
| 343 |
|
|
| 344 |
|
self # for chaining
|
| 345 |
|
end
|
| 346 |
|
|
| 347 |
|
#
|
| 348 |
|
# Returns +true+ if this row contains the same headers and fields in the
|
| 349 |
|
# same order as +other+.
|
| 350 |
|
#
|
| 351 |
|
def ==(other)
|
| 352 |
|
@row == other.row
|
| 353 |
|
end
|
| 354 |
|
|
| 355 |
|
#
|
| 356 |
|
# Collapses the row into a simple Hash. Be warning that this discards field
|
| 357 |
|
# order and clobbers duplicate fields.
|
| 358 |
|
#
|
| 359 |
|
def to_hash
|
| 360 |
|
# flatten just one level of the internal Array
|
| 361 |
|
Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
|
| 362 |
|
end
|
| 363 |
|
|
| 364 |
|
#
|
| 365 |
|
# Returns the row as a CSV String. Headers are not used. Equivalent to:
|
| 366 |
|
#
|
| 367 |
|
# faster_csv_row.fields.to_csv( options )
|
| 368 |
|
#
|
| 369 |
|
def to_csv(options = Hash.new)
|
| 370 |
|
fields.to_csv(options)
|
| 371 |
|
end
|
| 372 |
|
alias_method :to_s, :to_csv
|
| 373 |
|
|
| 374 |
|
# A summary of fields, by header.
|
| 375 |
|
def inspect
|
| 376 |
|
str = "#<#{self.class}"
|
| 377 |
|
each do |header, field|
|
| 378 |
|
str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
|
| 379 |
|
field.inspect
|
| 380 |
|
end
|
| 381 |
|
str << ">"
|
| 382 |
|
end
|
| 383 |
|
end
|
| 384 |
|
|
| 385 |
|
#
|
| 386 |
|
# A FasterCSV::Table is a two-dimensional data structure for representing CSV
|
| 387 |
|
# documents. Tables allow you to work with the data by row or column,
|
| 388 |
|
# manipulate the data, and even convert the results back to CSV, if needed.
|
| 389 |
|
#
|
| 390 |
|
# All tables returned by FasterCSV will be constructed from this class, if
|
| 391 |
|
# header row processing is activated.
|
| 392 |
|
#
|
| 393 |
|
class Table
|
| 394 |
|
#
|
| 395 |
|
# Construct a new FasterCSV::Table from +array_of_rows+, which are expected
|
| 396 |
|
# to be FasterCSV::Row objects. All rows are assumed to have the same
|
| 397 |
|
# headers.
|
| 398 |
|
#
|
| 399 |
|
# A FasterCSV::Table object supports the following Array methods through
|
| 400 |
|
# delegation:
|
| 401 |
|
#
|
| 402 |
|
# * empty?()
|
| 403 |
|
# * length()
|
| 404 |
|
# * size()
|
| 405 |
|
#
|
| 406 |
|
def initialize(array_of_rows)
|
| 407 |
|
@table = array_of_rows
|
| 408 |
|
@mode = :col_or_row
|
| 409 |
|
end
|
| 410 |
|
|
| 411 |
|
# The current access mode for indexing and iteration.
|
| 412 |
|
attr_reader :mode
|
| 413 |
|
|
| 414 |
|
# Internal data format used to compare equality.
|
| 415 |
|
attr_reader :table
|
| 416 |
|
protected :table
|
| 417 |
|
|
| 418 |
|
### Array Delegation ###
|
| 419 |
|
|
| 420 |
|
extend Forwardable
|
| 421 |
|
def_delegators :@table, :empty?, :length, :size
|
| 422 |
|
|
| 423 |
|
#
|
| 424 |
|
# Returns a duplicate table object, in column mode. This is handy for
|
| 425 |
|
# chaining in a single call without changing the table mode, but be aware
|
| 426 |
|
# that this method can consume a fair amount of memory for bigger data sets.
|
| 427 |
|
#
|
| 428 |
|
# This method returns the duplicate table for chaining. Don't chain
|
| 429 |
|
# destructive methods (like []=()) this way though, since you are working
|
| 430 |
|
# with a duplicate.
|
| 431 |
|
#
|
| 432 |
|
def by_col
|
| 433 |
|
self.class.new(@table.dup).by_col!
|
| 434 |
|
end
|
| 435 |
|
|
| 436 |
|
#
|
| 437 |
|
# Switches the mode of this table to column mode. All calls to indexing and
|
| 438 |
|
# iteration methods will work with columns until the mode is changed again.
|
| 439 |
|
#
|
| 440 |
|
# This method returns the table and is safe to chain.
|
| 441 |
|
#
|
| 442 |
|
def by_col!
|
| 443 |
|
@mode = :col
|
| 444 |
|
|
| 445 |
|
self
|
| 446 |
|
end
|
| 447 |
|
|
| 448 |
|
#
|
| 449 |
|
# Returns a duplicate table object, in mixed mode. This is handy for
|
| 450 |
|
# chaining in a single call without changing the table mode, but be aware
|
| 451 |
|
# that this method can consume a fair amount of memory for bigger data sets.
|
| 452 |
|
#
|
| 453 |
|
# This method returns the duplicate table for chaining. Don't chain
|
| 454 |
|
# destructive methods (like []=()) this way though, since you are working
|
| 455 |
|
# with a duplicate.
|
| 456 |
|
#
|
| 457 |
|
def by_col_or_row
|
| 458 |
|
self.class.new(@table.dup).by_col_or_row!
|
| 459 |
|
end
|
| 460 |
|
|
| 461 |
|
#
|
| 462 |
|
# Switches the mode of this table to mixed mode. All calls to indexing and
|
| 463 |
|
# iteration methods will use the default intelligent indexing system until
|
| 464 |
|
# the mode is changed again. In mixed mode an index is assumed to be a row
|
| 465 |
|
# reference while anything else is assumed to be column access by headers.
|
| 466 |
|
#
|
| 467 |
|
# This method returns the table and is safe to chain.
|
| 468 |
|
#
|
| 469 |
|
def by_col_or_row!
|
| 470 |
|
@mode = :col_or_row
|
| 471 |
|
|
| 472 |
|
self
|
| 473 |
|
end
|
| 474 |
|
|
| 475 |
|
#
|
| 476 |
|
# Returns a duplicate table object, in row mode. This is handy for chaining
|
| 477 |
|
# in a single call without changing the table mode, but be aware that this
|
| 478 |
|
# method can consume a fair amount of memory for bigger data sets.
|
| 479 |
|
#
|
| 480 |
|
# This method returns the duplicate table for chaining. Don't chain
|
| 481 |
|
# destructive methods (like []=()) this way though, since you are working
|
| 482 |
|
# with a duplicate.
|
| 483 |
|
#
|
| 484 |
|
def by_row
|
| 485 |
|
self.class.new(@table.dup).by_row!
|
| 486 |
|
end
|
| 487 |
|
|
| 488 |
|
#
|
| 489 |
|
# Switches the mode of this table to row mode. All calls to indexing and
|
| 490 |
|
# iteration methods will work with rows until the mode is changed again.
|
| 491 |
|
#
|
| 492 |
|
# This method returns the table and is safe to chain.
|
| 493 |
|
#
|
| 494 |
|
def by_row!
|
| 495 |
|
@mode = :row
|
| 496 |
|
|
| 497 |
|
self
|
| 498 |
|
end
|
| 499 |
|
|
| 500 |
|
#
|
| 501 |
|
# Returns the headers for the first row of this table (assumed to match all
|
| 502 |
|
# other rows). An empty Array is returned for empty tables.
|
| 503 |
|
#
|
| 504 |
|
def headers
|
| 505 |
|
if @table.empty?
|
| 506 |
|
Array.new
|
| 507 |
|
else
|
| 508 |
|
@table.first.headers
|
| 509 |
|
end
|
| 510 |
|
end
|
| 511 |
|
|
| 512 |
|
#
|
| 513 |
|
# In the default mixed mode, this method returns rows for index access and
|
| 514 |
|
# columns for header access. You can force the index association by first
|
| 515 |
|
# calling by_col!() or by_row!().
|
| 516 |
|
#
|
| 517 |
|
# Columns are returned as an Array of values. Altering that Array has no
|
| 518 |
|
# effect on the table.
|
| 519 |
|
#
|
| 520 |
|
def [](index_or_header)
|
| 521 |
|
if @mode == :row or # by index
|
| 522 |
|
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
| 523 |
|
@table[index_or_header]
|
| 524 |
|
else # by header
|
| 525 |
|
@table.map { |row| row[index_or_header] }
|
| 526 |
|
end
|
| 527 |
|
end
|
| 528 |
|
|
| 529 |
|
#
|
| 530 |
|
# In the default mixed mode, this method assigns rows for index access and
|
| 531 |
|
# columns for header access. You can force the index association by first
|
| 532 |
|
# calling by_col!() or by_row!().
|
| 533 |
|
#
|
| 534 |
|
# Rows may be set to an Array of values (which will inherit the table's
|
| 535 |
|
# headers()) or a FasterCSV::Row.
|
| 536 |
|
#
|
| 537 |
|
# Columns may be set to a single value, which is copied to each row of the
|
| 538 |
|
# column, or an Array of values. Arrays of values are assigned to rows top
|
| 539 |
|
# to bottom in row major order. Excess values are ignored and if the Array
|
| 540 |
|
# does not have a value for each row the extra rows will receive a +nil+.
|
| 541 |
|
#
|
| 542 |
|
# Assigning to an existing column or row clobbers the data. Assigning to
|
| 543 |
|
# new columns creates them at the right end of the table.
|
| 544 |
|
#
|
| 545 |
|
def []=(index_or_header, value)
|
| 546 |
|
if @mode == :row or # by index
|
| 547 |
|
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
| 548 |
|
if value.is_a? Array
|
| 549 |
|
@table[index_or_header] = Row.new(headers, value)
|
| 550 |
|
else
|
| 551 |
|
@table[index_or_header] = value
|
| 552 |
|
end
|
| 553 |
|
else # set column
|
| 554 |
|
if value.is_a? Array # multiple values
|
| 555 |
|
@table.each_with_index do |row, i|
|
| 556 |
|
if row.header_row?
|
| 557 |
|
row[index_or_header] = index_or_header
|
| 558 |
|
else
|
| 559 |
|
row[index_or_header] = value[i]
|
| 560 |
|
end
|
| 561 |
|
end
|
| 562 |
|
else # repeated value
|
| 563 |
|
@table.each do |row|
|
| 564 |
|
if row.header_row?
|
| 565 |
|
row[index_or_header] = index_or_header
|
| 566 |
|
else
|
| 567 |
|
row[index_or_header] = value
|
| 568 |
|
end
|
| 569 |
|
end
|
| 570 |
|
end
|
| 571 |
|
end
|
| 572 |
|
end
|
| 573 |
|
|
| 574 |
|
#
|
| 575 |
|
# The mixed mode default is to treat a list of indices as row access,
|
| 576 |
|
# returning the rows indicated. Anything else is considered columnar
|
| 577 |
|
# access. For columnar access, the return set has an Array for each row
|
| 578 |
|
# with the values indicated by the headers in each Array. You can force
|
| 579 |
|
# column or row mode using by_col!() or by_row!().
|
| 580 |
|
#
|
| 581 |
|
# You cannot mix column and row access.
|
| 582 |
|
#
|
| 583 |
|
def values_at(*indices_or_headers)
|
| 584 |
|
if @mode == :row or # by indices
|
| 585 |
|
( @mode == :col_or_row and indices_or_headers.all? do |index|
|
| 586 |
|
index.is_a?(Integer) or
|
| 587 |
|
( index.is_a?(Range) and
|
| 588 |
|
index.first.is_a?(Integer) and
|
| 589 |
|
index.last.is_a?(Integer) )
|
| 590 |
|
end )
|
| 591 |
|
@table.values_at(*indices_or_headers)
|
| 592 |
|
else # by headers
|
| 593 |
|
@table.map { |row| row.values_at(*indices_or_headers) }
|
| 594 |
|
end
|
| 595 |
|
end
|
| 596 |
|
|
| 597 |
|
#
|
| 598 |
|
# Adds a new row to the bottom end of this table. You can provide an Array,
|
| 599 |
|
# which will be converted to a FasterCSV::Row (inheriting the table's
|
| 600 |
|
# headers()), or a FasterCSV::Row.
|
| 601 |
|
#
|
| 602 |
|
# This method returns the table for chaining.
|
| 603 |
|
#
|
| 604 |
|
def <<(row_or_array)
|
| 605 |
|
if row_or_array.is_a? Array # append Array
|
| 606 |
|
@table << Row.new(headers, row_or_array)
|
| 607 |
|
else # append Row
|
| 608 |
|
@table << row_or_array
|
| 609 |
|
end
|
| 610 |
|
|
| 611 |
|
self # for chaining
|
| 612 |
|
end
|
| 613 |
|
|
| 614 |
|
#
|
| 615 |
|
# A shortcut for appending multiple rows. Equivalent to:
|
| 616 |
|
#
|
| 617 |
|
# rows.each { |row| self << row }
|
| 618 |
|
#
|
| 619 |
|
# This method returns the table for chaining.
|
| 620 |
|
#
|
| 621 |
|
def push(*rows)
|
| 622 |
|
rows.each { |row| self << row }
|
| 623 |
|
|
| 624 |
|
self # for chaining
|
| 625 |
|
end
|
| 626 |
|
|
| 627 |
|
#
|
| 628 |
|
# Removes and returns the indicated column or row. In the default mixed
|
| 629 |
|
# mode indices refer to rows and everything else is assumed to be a column
|
| 630 |
|
# header. Use by_col!() or by_row!() to force the lookup.
|
| 631 |
|
#
|
| 632 |
|
def delete(index_or_header)
|
| 633 |
|
if @mode == :row or # by index
|
| 634 |
|
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
| 635 |
|
@table.delete_at(index_or_header)
|
| 636 |
|
else # by header
|
| 637 |
|
@table.map { |row| row.delete(index_or_header).last }
|
| 638 |
|
end
|
| 639 |
|
end
|
| 640 |
|
|
| 641 |
|
#
|
| 642 |
|
# Removes any column or row for which the block returns +true+. In the
|
| 643 |
|
# default mixed mode or row mode, iteration is the standard row major
|
| 644 |
|
# walking of rows. In column mode, interation will +yield+ two element
|
| 645 |
|
# tuples containing the column name and an Array of values for that column.
|
| 646 |
|
#
|
| 647 |
|
# This method returns the table for chaining.
|
| 648 |
|
#
|
| 649 |
|
def delete_if(&block)
|
| 650 |
|
if @mode == :row or @mode == :col_or_row # by index
|
| 651 |
|
@table.delete_if(&block)
|
| 652 |
|
else # by header
|
| 653 |
|
to_delete = Array.new
|
| 654 |
|
headers.each_with_index do |header, i|
|
| 655 |
|
to_delete << header if block[[header, self[header]]]
|
| 656 |
|
end
|
| 657 |
|
to_delete.map { |header| delete(header) }
|
| 658 |
|
end
|
| 659 |
|
|
| 660 |
|
self # for chaining
|
| 661 |
|
end
|
| 662 |
|
|
| 663 |
|
include Enumerable
|
| 664 |
|
|
| 665 |
|
#
|
| 666 |
|
# In the default mixed mode or row mode, iteration is the standard row major
|
| 667 |
|
# walking of rows. In column mode, interation will +yield+ two element
|
| 668 |
|
# tuples containing the column name and an Array of values for that column.
|
| 669 |
|
#
|
| 670 |
|
# This method returns the table for chaining.
|
| 671 |
|
#
|
| 672 |
|
def each(&block)
|
| 673 |
|
if @mode == :col
|
| 674 |
|
headers.each { |header| block[[header, self[header]]] }
|
| 675 |
|
else
|
| 676 |
|
@table.each(&block)
|
| 677 |
|
end
|
| 678 |
|
|
| 679 |
|
self # for chaining
|
| 680 |
|
end
|
| 681 |
|
|
| 682 |
|
# Returns +true+ if all rows of this table ==() +other+'s rows.
|
| 683 |
|
def ==(other)
|
| 684 |
|
@table == other.table
|
| 685 |
|
end
|
| 686 |
|
|
| 687 |
|
#
|
| 688 |
|
# Returns the table as an Array of Arrays. Headers will be the first row,
|
| 689 |
|
# then all of the field rows will follow.
|
| 690 |
|
#
|
| 691 |
|
def to_a
|
| 692 |
|
@table.inject([headers]) do |array, row|
|
| 693 |
|
if row.header_row?
|
| 694 |
|
array
|
| 695 |
|
else
|
| 696 |
|
array + [row.fields]
|
| 697 |
|
end
|
| 698 |
|
end
|
| 699 |
|
end
|
| 700 |
|
|
| 701 |
|
#
|
| 702 |
|
# Returns the table as a complete CSV String. Headers will be listed first,
|
| 703 |
|
# then all of the field rows.
|
| 704 |
|
#
|
| 705 |
|
def to_csv(options = Hash.new)
|
| 706 |
|
@table.inject([headers.to_csv(options)]) do |rows, row|
|
| 707 |
|
if row.header_row?
|
| 708 |
|
rows
|
| 709 |
|
else
|
| 710 |
|
rows + [row.fields.to_csv(options)]
|
| 711 |
|
end
|
| 712 |
|
end.join
|
| 713 |
|
end
|
| 714 |
|
alias_method :to_s, :to_csv
|
| 715 |
|
|
| 716 |
|
def inspect
|
| 717 |
|
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
| 718 |
|
end
|
| 719 |
|
end
|
| 720 |
|
|
| 721 |
|
# The error thrown when the parser encounters illegal CSV formatting.
|
| 722 |
|
class MalformedCSVError < RuntimeError; end
|
| 723 |
|
|
| 724 |
|
#
|
| 725 |
|
# A FieldInfo Struct contains details about a field's position in the data
|
| 726 |
|
# source it was read from. FasterCSV will pass this Struct to some blocks
|
| 727 |
|
# that make decisions based on field structure. See
|
| 728 |
|
# FasterCSV.convert_fields() for an example.
|
| 729 |
|
#
|
| 730 |
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
| 731 |
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
| 732 |
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
| 733 |
|
#
|
| 734 |
|
FieldInfo = Struct.new(:index, :line, :header)
|
| 735 |
|
|
| 736 |
|
# A Regexp used to find and convert some common Date formats.
|
| 737 |
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
| 738 |
|
\d{4}-\d{2}-\d{2} )\z /x
|
| 739 |
|
# A Regexp used to find and convert some common DateTime formats.
|
| 740 |
|
DateTimeMatcher =
|
| 741 |
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
| 742 |
|
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
|
| 743 |
|
#
|
| 744 |
|
# This Hash holds the built-in converters of FasterCSV that can be accessed by
|
| 745 |
|
# name. You can select Converters with FasterCSV.convert() or through the
|
| 746 |
|
# +options+ Hash passed to FasterCSV::new().
|
| 747 |
|
#
|
| 748 |
|
# <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
|
| 749 |
|
# <b><tt>:float</tt></b>:: Converts any field Float() accepts.
|
| 750 |
|
# <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
|
| 751 |
|
# and <tt>:float</tt>.
|
| 752 |
|
# <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
|
| 753 |
|
# <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
|
| 754 |
|
# <b><tt>:all</tt></b>:: All built-in converters. A combination of
|
| 755 |
|
# <tt>:date_time</tt> and <tt>:numeric</tt>.
|
| 756 |
|
#
|
| 757 |
|
# This Hash is intetionally left unfrozen and users should feel free to add
|
| 758 |
|
# values to it that can be accessed by all FasterCSV objects.
|
| 759 |
|
#
|
| 760 |
|
# To add a combo field, the value should be an Array of names. Combo fields
|
| 761 |
|
# can be nested with other combo fields.
|
| 762 |
|
#
|
| 763 |
|
Converters = { :integer => lambda { |f| Integer(f) rescue f },
|
| 764 |
|
:float => lambda { |f| Float(f) rescue f },
|
| 765 |
|
:numeric => [:integer, :float],
|
| 766 |
|
:date => lambda { |f|
|
| 767 |
|
f =~ DateMatcher ? (Date.parse(f) rescue f) : f
|
| 768 |
|
},
|
| 769 |
|
:date_time => lambda { |f|
|
| 770 |
|
f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
|
| 771 |
|
},
|
| 772 |
|
:all => [:date_time, :numeric] }
|
| 773 |
|
|
| 774 |
|
#
|
| 775 |
|
# This Hash holds the built-in header converters of FasterCSV that can be
|
| 776 |
|
# accessed by name. You can select HeaderConverters with
|
| 777 |
|
# FasterCSV.header_convert() or through the +options+ Hash passed to
|
| 778 |
|
# FasterCSV::new().
|
| 779 |
|
#
|
| 780 |
|
# <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
|
| 781 |
|
# <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
|
| 782 |
|
# replaced with underscores, non-word characters
|
| 783 |
|
# are dropped, and finally to_sym() is called.
|
| 784 |
|
#
|
| 785 |
|
# This Hash is intetionally left unfrozen and users should feel free to add
|
| 786 |
|
# values to it that can be accessed by all FasterCSV objects.
|
| 787 |
|
#
|
| 788 |
|
# To add a combo field, the value should be an Array of names. Combo fields
|
| 789 |
|
# can be nested with other combo fields.
|
| 790 |
|
#
|
| 791 |
|
HeaderConverters = {
|
| 792 |
|
:downcase => lambda { |h| h.downcase },
|
| 793 |
|
:symbol => lambda { |h|
|
| 794 |
|
h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
|
| 795 |
|
}
|
| 796 |
|
}
|
| 797 |
|
|
| 798 |
|
#
|
| 799 |
|
# The options used when no overrides are given by calling code. They are:
|
| 800 |
|
#
|
| 801 |
|
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
| 802 |
|
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
| 803 |
|
# <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
|
| 804 |
|
# <b><tt>:converters</tt></b>:: +nil+
|
| 805 |
|
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
| 806 |
|
# <b><tt>:headers</tt></b>:: +false+
|
| 807 |
|
# <b><tt>:return_headers</tt></b>:: +false+
|
| 808 |
|
# <b><tt>:header_converters</tt></b>:: +nil+
|
| 809 |
|
# <b><tt>:skip_blanks</tt></b>:: +false+
|
| 810 |
|
# <b><tt>:force_quotes</tt></b>:: +false+
|
| 811 |
|
#
|
| 812 |
|
DEFAULT_OPTIONS = { :col_sep => ",",
|
| 813 |
|
:row_sep => :auto,
|
| 814 |
|
:quote_char => '"',
|
| 815 |
|
:converters => nil,
|
| 816 |
|
:unconverted_fields => nil,
|
| 817 |
|
:headers => false,
|
| 818 |
|
:return_headers => false,
|
| 819 |
|
:header_converters => nil,
|
| 820 |
|
:skip_blanks => false,
|
| 821 |
|
:force_quotes => false }.freeze
|
| 822 |
|
|
| 823 |
|
#
|
| 824 |
|
# This method will build a drop-in replacement for many of the standard CSV
|
| 825 |
|
# methods. It allows you to write code like:
|
| 826 |
|
#
|
| 827 |
|
# begin
|
| 828 |
|
# require "faster_csv"
|
| 829 |
|
# FasterCSV.build_csv_interface
|
| 830 |
|
# rescue LoadError
|
| 831 |
|
# require "csv"
|
| 832 |
|
# end
|
| 833 |
|
# # ... use CSV here ...
|
| 834 |
|
#
|
| 835 |
|
# This is not a complete interface with completely identical behavior.
|
| 836 |
|
# However, it is intended to be close enough that you won't notice the
|
| 837 |
|
# difference in most cases. CSV methods supported are:
|
| 838 |
|
#
|
| 839 |
|
# * foreach()
|
| 840 |
|
# * generate_line()
|
| 841 |
|
# * open()
|
| 842 |
|
# * parse()
|
| 843 |
|
# * parse_line()
|
| 844 |
|
# * readlines()
|
| 845 |
|
#
|
| 846 |
|
# Be warned that this interface is slower than vanilla FasterCSV due to the
|
| 847 |
|
# extra layer of method calls. Depending on usage, this can slow it down to
|
| 848 |
|
# near CSV speeds.
|
| 849 |
|
#
|
| 850 |
|
def self.build_csv_interface
|
| 851 |
|
Object.const_set(:CSV, Class.new).class_eval do
|
| 852 |
|
def self.foreach(path, rs = :auto, &block) # :nodoc:
|
| 853 |
|
FasterCSV.foreach(path, :row_sep => rs, &block)
|
| 854 |
|
end
|
| 855 |
|
|
| 856 |
|
def self.generate_line(row, fs = ",", rs = "") # :nodoc:
|
| 857 |
|
FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
|
| 858 |
|
end
|
| 859 |
|
|
| 860 |
|
def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
|
| 861 |
|
if block and mode.include? "r"
|
| 862 |
|
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
|
| 863 |
|
csv.each(&block)
|
| 864 |
|
end
|
| 865 |
|
else
|
| 866 |
|
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
|
| 867 |
|
end
|
| 868 |
|
end
|
| 869 |
|
|
| 870 |
|
def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
|
| 871 |
|
FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
|
| 872 |
|
end
|
| 873 |
|
|
| 874 |
|
def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
|
| 875 |
|
FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
|
| 876 |
|
end
|
| 877 |
|
|
| 878 |
|
def self.readlines(path, rs = :auto) # :nodoc:
|
| 879 |
|
FasterCSV.readlines(path, :row_sep => rs)
|
| 880 |
|
end
|
| 881 |
|
end
|
| 882 |
|
end
|
| 883 |
|
|
| 884 |
|
#
|
| 885 |
|
# This method allows you to serialize an Array of Ruby objects to a String or
|
| 886 |
|
# File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
|
| 887 |
|
# useful for spreadsheet and database interaction.
|
| 888 |
|
#
|
| 889 |
|
# Out of the box, this method is intended to work with simple data objects or
|
| 890 |
|
# Structs. It will serialize a list of instance variables and/or
|
| 891 |
|
# Struct.members().
|
| 892 |
|
#
|
| 893 |
|
# If you need need more complicated serialization, you can control the process
|
| 894 |
|
# by adding methods to the class to be serialized.
|
| 895 |
|
#
|
| 896 |
|
# A class method csv_meta() is responsible for returning the first row of the
|
| 897 |
|
# document (as an Array). This row is considered to be a Hash of the form
|
| 898 |
|
# key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
|
| 899 |
|
# key with a value of the stringified class name and FasterCSV::dump() will
|
| 900 |
|
# create this, if you do not define this method. This method is only called
|
| 901 |
|
# on the first object of the Array.
|
| 902 |
|
#
|
| 903 |
|
# The next method you can provide is an instance method called csv_headers().
|
| 904 |
|
# This method is expected to return the second line of the document (again as
|
| 905 |
|
# an Array), which is to be used to give each column a header. By default,
|
| 906 |
|
# FasterCSV::load() will set an instance variable if the field header starts
|
| 907 |
|
# with an @ character or call send() passing the header as the method name and
|
| 908 |
|
# the field value as an argument. This method is only called on the first
|
| 909 |
|
# object of the Array.
|
| 910 |
|
#
|
| 911 |
|
# Finally, you can provide an instance method called csv_dump(), which will
|
| 912 |
|
# be passed the headers. This should return an Array of fields that can be
|
| 913 |
|
# serialized for this object. This method is called once for every object in
|
| 914 |
|
# the Array.
|
| 915 |
|
#
|
| 916 |
|
# The +io+ parameter can be used to serialize to a File, and +options+ can be
|
| 917 |
|
# anything FasterCSV::new() accepts.
|
| 918 |
|
#
|
| 919 |
|
def self.dump(ary_of_objs, io = "", options = Hash.new)
|
| 920 |
|
obj_template = ary_of_objs.first
|
| 921 |
|
|
| 922 |
|
csv = FasterCSV.new(io, options)
|
| 923 |
|
|
| 924 |
|
# write meta information
|
| 925 |
|
begin
|
| 926 |
|
csv << obj_template.class.csv_meta
|
| 927 |
|
rescue NoMethodError
|
| 928 |
|
csv << [:class, obj_template.class]
|
| 929 |
|
end
|
| 930 |
|
|
| 931 |
|
# write headers
|
| 932 |
|
begin
|
| 933 |
|
headers = obj_template.csv_headers
|
| 934 |
|
rescue NoMethodError
|
| 935 |
|
headers = obj_template.instance_variables.sort
|
| 936 |
|
if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
|
| 937 |
|
headers += obj_template.members.map { |mem| "#{mem}=" }.sort
|
| 938 |
|
end
|
| 939 |
|
end
|
| 940 |
|
csv << headers
|
| 941 |
|
|
| 942 |
|
# serialize each object
|
| 943 |
|
ary_of_objs.each do |obj|
|
| 944 |
|
begin
|
| 945 |
|
csv << obj.csv_dump(headers)
|
| 946 |
|
rescue NoMethodError
|
| 947 |
|
csv << headers.map do |var|
|
| 948 |
|
if var[0] == ?@
|
| 949 |
|
obj.instance_variable_get(var)
|
| 950 |
|
else
|
| 951 |
|
obj[var[0..-2]]
|
| 952 |
|
end
|
| 953 |
|
end
|
| 954 |
|
end
|
| 955 |
|
end
|
| 956 |
|
|
| 957 |
|
if io.is_a? String
|
| 958 |
|
csv.string
|
| 959 |
|
else
|
| 960 |
|
csv.close
|
| 961 |
|
end
|
| 962 |
|
end
|
| 963 |
|
|
| 964 |
|
#
|
| 965 |
|
# :call-seq:
|
| 966 |
|
# filter( options = Hash.new ) { |row| ... }
|
| 967 |
|
# filter( input, options = Hash.new ) { |row| ... }
|
| 968 |
|
# filter( input, output, options = Hash.new ) { |row| ... }
|
| 969 |
|
#
|
| 970 |
|
# This method is a convenience for building Unix-like filters for CSV data.
|
| 971 |
|
# Each row is yielded to the provided block which can alter it as needed.
|
| 972 |
|
# After the block returns, the row is appended to +output+ altered or not.
|
| 973 |
|
#
|
| 974 |
|
# The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
|
| 975 |
|
# (generally String or IO objects). If not given, they default to
|
| 976 |
|
# <tt>ARGF</tt> and <tt>$stdout</tt>.
|
| 977 |
|
#
|
| 978 |
|
# The +options+ parameter is also filtered down to FasterCSV::new() after some
|
| 979 |
|
# clever key parsing. Any key beginning with <tt>:in_</tt> or
|
| 980 |
|
# <tt>:input_</tt> will have that leading identifier stripped and will only
|
| 981 |
|
# be used in the +options+ Hash for the +input+ object. Keys starting with
|
| 982 |
|
# <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
|
| 983 |
|
# are assigned to both objects.
|
| 984 |
|
#
|
| 985 |
|
# The <tt>:output_row_sep</tt> +option+ defaults to
|
| 986 |
|
# <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
| 987 |
|
#
|
| 988 |
|
def self.filter(*args)
|
| 989 |
|
# parse options for input, output, or both
|
| 990 |
|
in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
|
| 991 |
|
if args.last.is_a? Hash
|
| 992 |
|
args.pop.each do |key, value|
|
| 993 |
|
case key.to_s
|
| 994 |
|
when /\Ain(?:put)?_(.+)\Z/
|
| 995 |
|
in_options[$1.to_sym] = value
|
| 996 |
|
when /\Aout(?:put)?_(.+)\Z/
|
| 997 |
|
out_options[$1.to_sym] = value
|
| 998 |
|
else
|
| 999 |
|
in_options[key] = value
|