#!/usr/bin/env ruby # This filter changes all words to Title Caps, and attempts to be clever # about *un*capitalizing small words like a/an/the in the input. # # The list of "small words" which are not capped comes from # the New York Times Manual of Style, plus 'vs' and 'v'. # # Original Perl version by: # John Gruber # http://daringfireball.net/ # 10 May 2008 # # Adapted to Ruby by: # Marshall Elfstrand # http://vengefulcow.com/ # 21 May 2008 # # License: http://www.opensource.org/licenses/mit-license.php # $KCODE = "UTF-8" small_words = %w( a an and as at but by en for if in of on or the to v[.]? via vs[.]? ) small_re = small_words.join('|') STDIN.each_line do |input_line| line = "" input_line.split(/( [:.;?!][ ] | (?:[ ]|^)["“] )/x).each do |s| s.gsub!(/ \b( [[:alpha:]] [[:lower:].'’]* )\b /x) do |w| # Skip words with inline dots, e.g. "del.icio.us" or "example.com" (w =~ / [[:alpha:]] [.] [[:alpha:]] /x) ? w : w.capitalize end #gsub! # Lowercase our list of small words: s.gsub!(/\b(#{small_re})\b/io) { |w| w.downcase } # If the first word in the title is a small word, then capitalize it: s.gsub!(/\A([[:punct:]]*)(#{small_re})\b/io) { |w| $1 + $2.capitalize } # If the last word in the title is a small word, then capitalize it: s.gsub!(/\b(#{small_re})([[:punct:]]*)\Z/io) { |w| $1.capitalize + $2 } # Append current substring to output line += s end #each # Special Cases: line.gsub!(/ V(s?)\. /, ' v\1. ') # "v." and "vs." line.gsub!(/(['’])S\b/, '\1s') # 'S (otherwise you get "the SEC'S decision") line.gsub!(/\b(AT&T|Q&A)\b/i) { |w| w.upcase } # "AT&T" and "Q&A", which get tripped up by # self-contained small words "at" and "a" puts line end #each_line