Module: Export::CSV

Defined in:
lib/export/csv.rb

Overview

Code that translates scopes into downloadable tab-delimited CSV. Dependant on Postgresql.

Defined Under Namespace

Modules: Globi, TaxonNameOrigin

Class Method Summary collapse

Class Method Details

.copy_table(query) ⇒ Object

Returns Tempfile.

Parameters:

  • query

    any ActiveRecord::Relation

Returns:

  • Tempfile



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/export/csv.rb', line 112

def self.copy_table(query)
  conn = ::Export.get_connection

  t = Tempfile.new
  q = "COPY ( #{query.to_sql} ) TO STDOUT WITH (FORMAT CSV, DELIMITER E'\t', HEADER, ENCODING 'UTF8')"

  conn.copy_data(q) do
    while row = conn.get_copy_data
      t.write(row.force_encoding('UTF-8'))
    end
  end

  t.rewind
  t
end

.delete_columns(tbl, columns = []) ⇒ Array

Returns delete the specified columns.

Parameters:

  • table (Array)
  • columns (Array) (defaults to: [])

Returns:

  • (Array)

    delete the specified columns



69
70
71
72
73
74
75
76
77
# File 'lib/export/csv.rb', line 69

def self.delete_columns(tbl, columns = [])
  columns.each do |col|
    headers = tbl.collect { |c| c.first }
    if index = headers.index(col.to_s)
      tbl.delete_at(index)
    end
  end
  tbl
end

.generate_csv(scope, exclude_columns: [], header_converters: [], trim_rows: false, trim_columns: false, column_order: []) ⇒ CSV

translate a scope into a CSV table, with optional tweaks to the data

This is a very nice reference for future consideration:

http://collectiveidea.com/blog/archives/2015/03/05/optimizing-rails-for-memory-usage-part-3-pluck-and-database-laziness

Parameters:

  • scope (Scope)
  • exclude_columns (Array) (defaults to: [])

    strings

  • header_converters (Array) (defaults to: [])
  • trim_rows (Boolean) (defaults to: false)
  • trim_columns (Boolean) (defaults to: false)

Returns:



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/export/csv.rb', line 16

def self.generate_csv(scope, exclude_columns: [], header_converters: [], trim_rows: false, trim_columns: false, column_order: [])

  column_names = scope.columns_hash.keys
  column_names = sort_column_headers(column_names, column_order.map(&:to_s)) if column_order.any?

  h = ::CSV.new(column_names.join(','), header_converters:, headers: true)
  h.read

  headers = ::CSV::Row.new(h.headers, h.headers, true).headers

  tbl = headers.map { |h| [h] }

  # Pluck rows is from postgresql_cursor gem
  #puts Rainbow('preparing data: ' + (Benchmark.measure do
  scope.pluck_rows(*column_names).each do |o|
    o.each_with_index do |value, index|
      tbl[index] << Utilities::Strings.sanitize_for_csv(value)
    end
    # If keys are not deterministic: .attributes.values_at(*column_names).collect{|v| Utilities::Strings.sanitize_for_csv(v) }
  end
  # end).to_s).yellow

  if !exclude_columns.empty?
    # puts Rainbow('deleting columns: ' + (Benchmark.measure {
    delete_columns(tbl, exclude_columns)
    # }).to_s).yellow
  end

  if trim_columns
    # puts Rainbow('trimming columns: ' + (Benchmark.measure {
    trim_columns(tbl)
    # }).to_s).yellow
  end

  # CSV::Converters are only available on read, not generate, so we can't use them here.
  output = StringIO.new
  # puts Rainbow('generating CSV: '+ (Benchmark.measure do
  (0..tbl.first.length-1).each do |row_index|
    row = tbl.collect { |c| c[row_index] }
    if trim_rows
      next unless row.detect { |c| c.present? } # Minimize checks by exiting ASAP.  Could benchmark vs. row.compact.blank?
    end
    output.puts ::CSV.generate_line(row, col_sep: "\t", encoding: Encoding::UTF_8)
  end
  # end).to_s).yellow

  output.string
end

.sort_column_headers(column_names = [], column_order = []) ⇒ Object

Sort order for columns

columns not in column order at added at the the *front* of the file


96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/export/csv.rb', line 96

def self.sort_column_headers(column_names = [], column_order = [])
  sorted = []
  unsorted = []
  column_names.each do |n|
    if p = column_order.index(n)
      sorted[p] = n
    else
      unsorted.push n
    end
  end

  unsorted + sorted
end

.trim_columns(table) ⇒ Array

Parameters:

  • table (Array)

    remove columns without any non-#blank? values (of course doing this in the scope is better!) this is very slow, use a proper scope, and exclude_columns: [] options instead

Returns:

  • (Array)


83
84
85
86
87
88
89
90
91
92
# File 'lib/export/csv.rb', line 83

def self.trim_columns(table)
  to_delete = []

  table.each_with_index do |col, index|
    to_delete << index unless col.inject { |_, c| break true if c.present? }
  end

  to_delete.each_with_index { |x, i| table.delete_at(x-i) }
  table
end