#!/usr/bin/env ruby # Ag -- archiving all the 'golden' flamewars on -dev # Alex Legler $VERBOSE = nil #require 'bundler/setup' require 'mail' require 'maildir' require 'elasticsearch' require 'optparse' require 'ostruct' require 'parallel' require 'ruby-progressbar' require_relative 'lib/utils' require_relative 'lib/threading' require_relative 'lib/rendering' require_relative 'lib/storage' require_relative 'lib/hotfixes' require_relative 'lib/monkeypatch_es' $options = OpenStruct.new $options.action = nil $options.name = nil $options.index_only = false $options.no_threading = false $options.debug = false $options.readonly = false $options.jobs = false $options.progress = true $options.need_argument = true $options.argmode = nil $options.comment = nil op = OptionParser.new do |opts| actions = %w(hide-msg unhide-msg index-full index-new delete-msg delete-index reindex rethread info).map { |s| '--' + s }.join('|') opts.banner = "Usage: ag <<#{actions}>> <--list listname>> <[--file|--msgid|--hash] > [options]" opts.on('--index-full', 'Read the full past archive from Maildir/cur. Does --delete-index by default. Needs --list and a Maildir') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_full $options.argmode = :dir end opts.on('--index-new', 'Read new messages from Maildir/new and move them to Maildir/cur. Needs --list and a Maildir') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_incremental $options.argmode = :dir end opts.on('--delete-msg', 'Delete message. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_delete_msg end opts.on('--hide-msg', 'Hides a message. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_hide_msg end opts.on('--unhide-msg', 'Unhides a message. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_unhide_msg end opts.on('--create-index', 'Create index but do not populate. Needs --list') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_create_index $options.need_argument = false end opts.on('--flush-index', 'Flush index to disk. Needs --list') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_flush_index $options.need_argument = false end opts.on('--rethread', 'Rethread messages. Needs --list') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_rethread $options.need_argument = false end opts.on('--delete-index', 'Delete index. Needs --list') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_delete_index $options.need_argument = false end opts.on('--info', 'Display message details. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_info end opts.on('--reindex', 'Reindex message. Needs --list and --file') do abort 'Can only select one action' unless $options.action.nil? $options.action = :do_reindex end opts.on('--list NAME', 'Name of the mailing list to work with') do |name| if name =~ /^[0-9a-zA-Z-]+$/ $options.name = name else abort 'List name can only consist of letters, numbers and hyphens.' end end opts.on('--file', 'The argument is a file') do $options.argmode = :file end opts.on('--msgid', 'The argument is a Message-Id') do $options.argmode = :msgid end opts.on('--hash', 'The argument is a X-Archives-Hash') do $options.argmode = :hash end opts.on('--index-only', 'Only delete the message from the index, not from disk') do $options.index_only = true end opts.on('--no-threading', 'Only index, don\'t update threading') do $options.no_threading = true end opts.on('--debug', 'Print debug messages') do $options.debug = true end opts.on('--readonly', 'Do not alter the maildir in any way') do $options.readonly = true end opts.on('--jobs JOBS', 'Number of parallel jobs to run (defaults to 75% of core count)') do |jobs| $options.jobs = jobs.to_i end opts.on('--progress', 'Display the progress bar') do $options.progress = true end opts.on('--no-progress', 'Do not display the progress bar') do $options.progress = false end opts.on('--comment COMMENT', 'Comment string as why the message is being hidden/unhidden.') do |comment| $options.comment = comment end end op.parse! abort op.help unless $options.action abort 'List name required' unless $options.name if $options.need_argument abort 'Need a Maildir/File/Hash/Message-Id to work with' if ARGV.empty? $options.dir = ARGV[0] end if $options.argmode == :dir # Open maildir and set serializer $maildir = Maildir.new(File.join($options.dir), false) $maildir.serializer = Maildir::Serializer::Mail.new end # Connect to Elasticsearch $es = Elasticsearch::Client.new(log: false) $es.transport.reload_connections! Ag::Utils.proc_count = $options.jobs ############################################################################### def do_full abort "Wrong argument type: #{$options.argmode}" unless $options.argmode == :dir do_delete_index(ignore_missing: true, _raise: true) unless $options.readonly do_create_index(ignore_exists: true, _raise: true) messages = $maildir.list(:cur) opts = { in_processes: Ag::Utils.proc_count } opts[:progress] = "Importing #{$options.name}" if $options.progress Parallel.each(messages, opts) do |maildir_message| mail = maildir_message.data begin Ag::Storage.store($options.name, mail, maildir_message.unique_name) rescue => e $stderr.puts "Cannot save message #{mail.message_id}: (#{e.class}) #{e.message}" if $options.debug next end end do_rethread end def do_incremental abort "Wrong argument type: #{$options.argmode}" unless $options.argmode == :dir messages = $maildir.list(:new) do_create_index(ignore_exists: true, _raise: true) opts = { in_processes: Ag::Utils.proc_count } opts[:progress] = "Importing #{$options.name}" if $options.progress Parallel.each(messages, opts) do |maildir_message| mail = maildir_message.data begin Ag::Storage.store($options.name, mail, maildir_message.unique_name) maildir_message.process unless $options.readonly rescue => e $stderr.puts "Cannot save message #{mail.message_id} (file #{maildir_message.filename}): #{e.message}" if $options.debug next end end do_rethread end def do_rethread Ag::Threading.calc($options.name) unless $options.no_threading end def do_delete_msg id = Ag::Utils.resolve_id begin Ag::Storage.delete($options.name, id) rescue => e $stderr.puts "Cannot delete message: #{e}" end end def do_hide_msg id = Ag::Utils.resolve_id begin Ag::Storage.hide($options.name, id, $options.comment) rescue => e $stderr.puts "Cannot hide message: #{e}" end end def do_unhide_msg id = Ag::Utils.resolve_id begin Ag::Storage.unhide($options.name, id, $options.comment) rescue => e $stderr.puts "Cannot unhide message: #{e}" end end def do_delete_index(ignore_missing: false, _raise: false) Ag::Storage.delete_index($options.name) rescue Elasticsearch::Transport::Transport::Errors::NotFound => e unless ignore_missing raise e if _raise $stderr.puts "Index does not exist: #{e}" end rescue => e raise e if _raise $stderr.puts "Cannot delete index: #{e}" end def do_create_index(ignore_exists: false, _raise: false) Ag::Storage.create_index($options.name) rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e unless ignore_exists && e.message =~ /IndexAlreadyExistsException/ raise e if _raise $stderr.puts "Cannot create index #{e}" end end def do_flush_index(ignore_exists: false, _raise: false) Ag::Storage.flush_index($options.name) rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e unless ignore_exists && e.message =~ /IndexAlreadyExistsException/ raise e if _raise $stderr.puts "Cannot flush index #{e}" end end def do_reindex # http://babinho.net/2014/07/refresh-your-elasticsearch-index-with-zero-downtime/ abort 'Come back later.' end def do_info id = Ag::Utils.resolve_id begin message = Ag::Storage.get($options.name, id) raise 'No such message' unless message require 'pp' str = "Message #{id}" $stderr.puts str, '-' * str.length pp message['_source'] rescue => e $stderr.puts "Cannot display message: #{e}" end end ############################################################################### if private_methods.include? $options.action send $options.action else abort "Internal Error: Unknown action: #{$options.action}" end # vim: ts=2 sts=2 et ft=ruby: