如何用 ruby 从 excel 中抽取 email?
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。
可用如下代码解决:
$KCODE = 'u'
require 'find'
require 'win32ole'
require 'pathname'
class AutoFixMailPros
  def initialize
    @excel = WIN32OLE.new('Excel.Application')
    @excel.visible = false
    @excel.Application.DisplayAlerts = false
  end
  def run_excel(directory = 'D:/excelrun/temp')
    @excel_files = find_excel_files(directory)
    return unless @excel_files.any?
    @excel_files.each do |file_path|
      process_excel_file(file_path)
      rename_processed_file(file_path)
    end
    @excel.Quit
    @excel = nil
    GC.start
  end
  private
  def find_excel_files(dir)
    files = []
    Find.find(dir) do |path|
      next if File.basename(path).start_with?('@$$')
      files << path if File.extname(path).casecmp('.xls') == 0
    end
    files
  end
  def process_excel_file(file_path)
    workbook = @excel.Workbooks.Open(file_path)
    sheet_count = workbook.Sheets.Count
    (1..sheet_count).each do |sheet_num|
      extract_emails_to_file(workbook, sheet_num, file_path)
    end
    workbook.Close(true)
  end
  def extract_emails_to_file(workbook, sheet_num, base_path)
    worksheet = workbook.Worksheets(sheet_num)
    rows = worksheet.UsedRange.Rows.Count
    cols = worksheet.UsedRange.Columns.Count
    emails = find_emails_in_sheet(worksheet, rows, cols)
    save_emails_to_txt(base_path, emails, sheet_num) if emails.any?
  end
  def find_emails_in_sheet(worksheet, rows, cols)
    emails = []
    (1..rows).each do |row|
      (1..cols).each do |col|
        value = worksheet.Cells(row, col).Value.to_s
        emails << value if value.match?(/\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)/)
      end
    end
    emails
  end
  def save_emails_to_txt(base_path, emails, sheet_num)
    file_name = "#{File.basename(base_path, '.*')}_#{sheet_num}.txt"
    file_path = Pathname.new(File.dirname(base_path)).realpath + file_name
    File.open(file_path, 'w') { |file| emails.each { |email| file.puts(email) } }
  end
  def rename_processed_file(original_path)
    # Implement renaming logic if needed
    # new_name = "#{Pathname.new(original_path).dirname}(#{File.basename(original_path, '.*')}@$$$.xls)"
    # File.rename(original_path, new_name)
  end
end
mailProsTools = AutoFixMailPros.new
mailProsTools.run_excel