#!/usr/bin/ruby -w

require 'optparse'
require 'ostruct'
require 'net/http'
require 'uri'
require 'rexml/document'

include REXML

NOW = Time.now
ONE_DAY = 86400

class Grabber
  PROGRAM = $0.sub( /^.*\//, '' )
  VERSION = '1.10.0'
  RELEASE = '$Id: tv_grab_nl_upc,v 1.228 2011/04/05 07:33:12 ianmacd Exp $'
  GUIDE = 'http://tvgids.upc.nl/TV/'
  DESC = 'UPC Cable TV, The Netherlands (%s)' % [ GUIDE ]
  URL = 'http://www.caliban.org/ruby/xmltv_upc.shtml'
  CAPABILITIES = %w[ baseline manualconfig ]
  WORKING_DIR = File.expand_path( '~/.xmltv' )
  BASE_URL = 'http://tvgids.upc.nl'
  BACKOFF_INCREMENT = 0.20


  # Checks to ensure we're in a sane environment before we do anything else.
  #
  def Grabber.pre_checks(options)
    # Check that we're not running as root.
    #
    if Process.euid == 0
      $stderr.puts 'This program should not be run as root. Aborting...'
      exit 6
    end
  
    # Check for correct locale. Character classes in regular expressions behave
    # differently, depending on the locale.
    #
    if ( lang = ENV['LANG'] ) != 'nl_NL'
      lang = 'unset' if lang.empty? || lang.nil?
  
      unless options.quiet
	$stderr.puts \
  	'Warning: Your locale ($LANG) is %s, but nl_NL is recommended.' %
  	[ lang ]
      end
    end
  
    # Check for a local time zone that is different from the Netherlands,
    # because programme start and finish times will be wrong.
    #
    if NOW.zone !~ /^CES?T$/ && ! options.quiet
      $stderr.puts \
	'Warning: Your time zone is %s, not CET/CEST, so programme start and '\
	"end times\n         will probably be wrong." % [ NOW.zone ]
    end
  end


  # Create a directory, aborting the programme if the given path already exists
  # as some other file type.
  #
  def Grabber.create_directory(dir, options)
    if File.exist?( dir )
      return if File.directory?( dir )
      $stderr.puts '%s exists, but is not a directory.' % [ dir ]
      exit 7
    end
  
    require 'fileutils'
    $stderr.puts 'Creating %s...' % [ dir ] if options.verbose
    FileUtils.mkdir_p( dir )
  end


  # Build a grabber channel config file from scratch.
  #
  def Grabber.configure(options)
    $stderr.puts 'Using config file %s.' % [ options.config ]
  
    # Create config file directory if necessary.
    #
    config_dir = File.dirname( options.config )
    Grabber.create_directory( config_dir, options )
   
    if File.exist?( options.config )
      unless File.file?( options.config )
	$stderr.puts '%s exists, but is not a regular file.' %
	  [ options.config ]
	exit 7
      end
  
      # Deal with the case of an existing, non-zero byte config file.
      #
      unless File.zero?( options.config )
	print <<"EOF"
  The file #{options.config} already exists
  and is not empty. There is currently no support for altering an existing
  configuration, so you will have to reconfigure from scratch.
  
  Do you wish to overwrite the old configuration? [yes, no (default = no)] ?
EOF
  
	loop do
  	case answer = $stdin.gets
  	when /^(y|ye|yes)$/i
  	  break
  	when /^(no?|\n)$/i	# 'No' is the default.
  	  puts 'Configuration aborted.'
  	  return
  	else
  	  puts "Invalid response. Please choose either 'yes' or 'no'."
  	  redo
  	end
	end
      end
  
    end
   
    opts = options.clone
    opts.verbose = false
    opts.quiet = false
  
    $stderr.puts 'Obtaining list of channel names and numbers...'
    channels = Channel.get_available( opts )[0]

    # Write a new configuration file.
    #
    count = 0
    File.open( options.config, 'w' ) do |f|
      all = false
  
      # Work through channels in ascending channel number order.
      #
      channels.sort { |a, b| a[0] <=> b[0] }.each do |num, name|
  
	unless all
  	print 'Add channel %s (%s) [yes, no, all, none (default = yes)] ? ' %
  	  [ name, num ]
  
  	case answer = $stdin.gets
  	when /^(y|ye|yes|\n)$/i
  	when /^no$/i
  	  next
  	when /^(a|al|all)$/i
  	  all = true
  	when /^none?$/i
  	  break
  	else
  	  puts "Invalid response. Please choose between 'yes', 'no', 'all' and 'none'."
  	  redo
  	end
  
	end
  
	f.puts 'channel %s %s' % [ num, name ]
	count += 1
      end
    end
  
    puts 'Finished configuration of %d channel(s).' % [ count ]
  end

  # Read the grabber's configuration file.
  #
  def Grabber.read_config(file, upc_cids, verbose)
    $stderr.puts 'Parsing channel file...' if verbose
  
    channels = Hash.new
  
    file.each do |line|
      next if line =~ /^(#|$)/
      number, name = line.match( /^(?i:channel )?(\d+)\s+(.+)$/ )[1..2]

      channel = Channel.new( number.to_i, name.rstrip, upc_cids[number.to_i] )
      channels[channel.number] = channel
    end

    if verbose
      $stderr.puts '%d channel(s) found in configuration file.' %
		   [ channels.size ]
    end
  
    channels
  end


  # Fetch a page of HTML.
  #
  def Grabber.get_page(url, req, options)
    excepts = 0
    res = nil

    loop do
      begin
	uri = url.path
	uri << '?' << url.query if url.query

	$stderr.puts '  Fetching %s ...' % [ uri ] if options.verbose

	res = Net::HTTP.start( url.host, url.port ) do |http|
		req['User-Agent'] = options.user_agent
		http.request( req )
	      end

	# Be nice (or not) to the server.
	#
	if options.verbose
	  $stderr.puts "    Sleeping %.2f second(s)...\n\n" % [ options.sleep ]
	end

	sleep options.sleep

	if res.code == '503' && res.body == 'Please slow down'
	  unless options.quiet
	    $stderr.puts "  Warning: UPC is throttling me. Increasing sleep time to %.2f seconds..." % [ options.sleep + BACKOFF_INCREMENT ]
	  end

	  options.sleep += BACKOFF_INCREMENT
    	  redo
    	end

	break unless res.key?( 'location' )

	# If we get an HTTP 3xx redirect, we need to follow it.
	#
	if options.debug
	  $stderr.puts '  Following HTTP %s redirection...' % [ res.code ]
	end

	url = URI.parse( res['location'] )
	req = Net::HTTP::Get.new( url.path )
	redo

      rescue EOFError, Errno::ECONNREFUSED, Errno::ECONNRESET,
	     Errno::ETIMEDOUT, Timeout::Error => e
	if ( excepts += 1 ) < options.tries
	  unless options.quiet
	    $stderr.puts "  '%s' whilst fetching page. Retry #%d..." %
	      [ e, excepts ]
	  end
	  retry
	end

	return nil
      end
    end

    return nil if res.code != '200'

    res.body
  end


  # We're going to have to use String#size if String#bytesize isn't available.
  # This is for Ruby pre-1.8.7.
  #
  unless String.instance_methods.include? 'bytesize'
    String.module_eval( 'alias :bytesize :size' )
  end


  # URL-encode a string for use as a URL path.
  #
  def Grabber.url_encode(string)
    string.gsub( /([^ a-zA-Z0-9_.-]+)/ ) do
      '%' + $1.unpack( 'H2' * $1.bytesize ).join( '%' ).upcase
    end.tr( ' ', '+' )
  end

end


# A class for film ratings.
#
class Rating
  IMDB_URL = 'http://uk.imdb.com'
  RATINGS_FILE = File.join( Grabber::WORKING_DIR, 'ratings.list' )
  RATINGS_FILE_GZ = File.join( Grabber::WORKING_DIR, 'ratings.list.gz' )
  CACHE_FILE = RUBY_VERSION >= '1.9.0' ? 'rating.cache' : 'ratings_cache.yaml'

  if RUBY_VERSION >= '1.9.0'
    SERIAL = Marshal
  else
    require 'yaml'
    SERIAL = YAML
  end

  @@rated = 0
  @@not_rated = 0
  @@cache = {}
  @@cache_positives = 0
  @@cache_negatives = 0
  @@ratings = {}

  attr_reader :timestamp
  attr_accessor :rating, :ctime, :mtime


  # Obtain the film ratings file from IMDB.
  #
  def Rating.get_ratings_list(options)

    # Download the ratings file if it doesn't exist or is older than 7 days.
    # See http://www.imdb.com/interfaces#plain for more information.
    #
    if ! File.exist?( RATINGS_FILE ) ||
	   ( NOW - File.mtime( RATINGS_FILE ) ) / ONE_DAY > 7
      if options.verbose
	$stderr.puts 'Attempting to download IMDB ratings list via FTP...'
      end

      require 'net/ftp'

      Net::FTP.open( 'ftp.funet.fi' ) do |ftp|
	ftp.debug_mode = true if options.debug
	ftp.login
	ftp.chdir( '/pub/mirrors/ftp.imdb.com/pub' )
	ftp.passive = true
	ftp.getbinaryfile( 'ratings.list.gz', RATINGS_FILE_GZ, 1024 )
      end

      require 'shell'

      begin
	gunzip = Shell::CommandProcessor.new( Shell.new ).
		 find_system_command( 'gunzip' )
      
      # If we don't have gunzip in our PATH, an exception will be raised.
      #
      rescue Shell::Error::CommandNotFound
	$stderr.puts 'Please gunzip %s and then rerun.' % [ RATINGS_FILE_GZ ]
	exit
      end

      # Decompress ratings file.
      #
      $stderr.puts 'Decompressing ratings file...' if options.verbose
      system( gunzip, '-f', RATINGS_FILE_GZ )
    end
  end


  # Cache a film rating, if found, or the fact that one wasn't found.
  #
  def Rating.cache_rating(title, options, rating)
    if rating
      $stderr.puts '      Succeeded: %s.' % [ rating ] if options.verbose
  
      # Cache the result and return a rating, e.g. 7/10.
      #
      @@cache[title] = Rating.new( rating )
      @@rated += 1

      @@cache[title].rating
    else
      $stderr.puts '      Failed.' if options.verbose

      # Cache a negative result and return nil.
      #
      @@cache[title] = Rating.new( false )
      @@not_rated += 1

      nil
    end
  end
 

  # Look up a film's rating on IMDB.
  #
  def Rating.imdb_rating(title, options)

    # Titles come from UPC as UTF-8, so we check for any accented alpha
    # characters in the title and, if we find them, convert the title to
    # Latin-1 (ISO-8859-1).
    #
    if RUBY_VERSION >= '1.9.0'
      title = title.clone.encode( 'ISO-8859-1' )
    elsif title =~ /\xC3[\x80-\xBF]/
      title = title.unpack( 'U*' ).pack( 'c*' )
    end

    if options.verbose
      $stderr.puts '      Attempting to rate "%s"...' % [ title ]
    end

    if @@cache.key?( title )

      # Update timestamp on cache entry.
      #
      @@cache[title].mtime = Time.now

      if @@cache[title].rating
	if options.verbose
	  $stderr.puts '      Found in cache: %s.' % [ @@cache[title].rating ]
	end
	@@cache_positives += 1

	return @@cache[title].rating

      elsif ! @@cache[title].rating
	$stderr.puts '      Negative result found in cache.' if options.verbose

	unless options.ignore_negatives
	  @@cache_negatives += 1
	  return
	end

	$stderr.puts '      Ignoring cached negative...' if options.verbose
      end
    end

    if options.static_ratings
      rating = nil

      # if we're using threads, we have to wait for the first thread to
      # populate the ratings hash before we allow other threads to consult it.
      #
      options.threads.lock if options.threads

      # If static ratings are desired, create the hash of titles and ratings
      # from the static IMDB ratings file.
      #
      if @@ratings.empty?
	$stderr.puts '        Reading ratings file...' if options.verbose

	File.open( RATINGS_FILE ) do |f|
	  rating = nil
	  body = false

	  f.each_line do |l|
	    l.force_encoding( 'ISO-8859-1' ) if RUBY_VERSION > '1.9.0'

	    # Get to the right point in the file.
	    #
	    body = true if l =~ /^MOVIE RATINGS REPORT/
	    next unless body

	    if m = l.match( /^\s+\S{10}\s+\d+\s+(\d+\.\d+)\s+(.+)$/ )

	      # Remove year and any other trailing stuff from film title and
	      # convert it to lower case.
	      #
	      t = m[2].sub( /\s*\(\d+\).*$/, '' ).downcase

	      # Store the rating.
	      #
	      @@ratings[t] = m[1]
	    end

	  end

	  if options.verbose
	    $stderr.puts '        Read %d ratings.' % [ @@ratings.size ]
	  end
	end
      end

      # It's now safe to access the ratings hash.
      #
      options.threads.unlock if options.threads

      l_title = title.downcase
      rating = @@ratings[l_title] + '/10' if @@ratings.key?( l_title )
      return Rating.cache_rating( title, options, rating )
    end

    # Otherwise, grab the rating dynamically from IMDB.
    #
    guide_title = title.clone
    res = nil
    rating_regex = /<span class="rating-rating">(\d+\.\d+)<span>(\/\d+)/

    loop do
      # When parsing the URL, we have to remove any ampersand entities in the
      # title.
      #
      url = URI.parse( IMDB_URL + '/find?q=%s;s=tt' %
	  	     [ Grabber.url_encode( title.gsub( /&amp;/, '&' ) ) ] )
      req = Net::HTTP::Get.new( url.path + '?' + url.query )
      return nil unless res = Grabber.get_page( url, req, options )

      # A modified version of the title will be used for matching.
      #
      # Firstly, any occurrence of '&amp;' in the title is changed to a literal
      # ampersand.
      #
      # Next, any non-alphanumeric characters are made optional, because these
      # may not be used to form the title in the IMDB database. The IMDB HTML,
      # may, however, use numeric entities to represent them, so we also have
      # to match against the possibility of those. This is further complicated
      # by the fact that the numeric entity may be decimal or hexadecimal.
      #
      # All ampersands and accented characters (ASCII A0 - FF) are then
      # allowed to match against their alphabetic or numeric HTML entity
      # equivalent.
      #
      # Finally, the word 'and' in the title is allowed to also match '&amp;'
      # or its (hexa)decimal entity equivalent.
      #
      # Note that the # character must be escaped, as we will be interpolating
      # it into the /x style regex that follows, where it would otherwise be
      # treated as a comment character.
      #
      # We must also refer to the ampersand as \x26 in the second gsub's
      # substitution string, because a literal ampersand would be replaced by
      # the third gsub.
      #
      title.force_encoding( 'ASCII-8BIT' ) if RUBY_VERSION >= '1.9.0'
      mod_title = title.gsub( /&amp;/, '&' ).
			gsub( /[^&[:alnum:]\xA0-\xFF]/,
			      '(?:.?|\x26\#(x[[:xdigit:]]+|\d+);)' ).
			gsub( /[&\xA0-\xFF]/,
			     '&(?:\#(x[[:xdigit:]]+|\d+)|[[:alpha:]]+);' ).
			gsub( /and/i, '(?:and|&\#(x26|40);|&amp;)' )

      if m = res.match( /
	  # Most film titles return 'Popular Titles', but occasionally one
	  # will return just 'Exact Matches' (or even 'Approx Matches', which
	  # we ignore).
	  #
	  <p><b>(?:Popular\sTitles |
		   Titles\s\(Exact\sMatches\)
		)
	     <\/b>\s+
	  
	  # Now we can grab the URL path of the first film in the list, which
	  # may not actually be the right one if the film has been remade.
	  #
	  \(Displaying\s\d+\sResults?\).+?<a\shref="([^"]+)"[^>]*>

	  # But we must also make sure that we really did match the same title,
	  # not a substring of some other title. To achieve a slightly fuzzy
	  # match, we match case-insensitively, using an embedded pattern-match
	  # modifier (i.e. (?i:pattern) ) and a modified version of the title.
	  #
	  # This can be done, because the interpolation of the result from the
	  # code below takes place before the regex is matched.
	  #
	  (?:
	    # At this point, we can match either directly on the title:
	    # 
	    (?i:#{mod_title})<\/a> |

	    # Or the film may have originally had a foreign title, in which
	    # case it may be followed by an 'a.k.a.' line with the translated
	    # title that we're looking for:
	    #
	    [^<]+<\/a>[^<]+<br>.+?aka\s<em>"(?i:#{mod_title})"<\/em>
	  )
	/x )

	# Now we follow the URL path to get the ratings page.
	#
	url = URI.parse( IMDB_URL + m[1] )
	req = Net::HTTP::Get.new( url.path )
	return nil unless res = Grabber.get_page( url, req, options )

	# Extract the rating from the page.
	#
	break

      elsif res !~ rating_regex && title.sub!( /, (A|The)$/, '' )
	# If we haven't just fetched the film's ratings page and its title
	# ends with ', A' or ', The', we can try again with this string at the
	# beginning of the title instead.
	#
	title = $1 + ' ' + title
	if options.debug
	  $stderr.puts '        Reattempting as "%s"...' % [ title ]
	end

	redo
      else
	# We must be on the ratings page.
	#
	break
      end

    end
  
    # At this point, we should have a page that contains a rating for this
    # film.
    #
    m = res.match( rating_regex )
    Rating.cache_rating( guide_title, options, m ? m[1] + m[2] : nil )
  
  end


  # Load the film rating cache from disc.
  #
  def Rating.load_cache(options)
    if File.exist?( options.ratings )
      $stderr.puts 'Loading ratings cache file...' if options.verbose
      @@cache = File.open( options.ratings ) { |f| SERIAL.load( f ) } || {}

      # Expire any cache entries older than a week.
      #
      orig_size = @@cache.size
      positive = 0
      negative = 0

      $stderr.puts 'Removing expired ratings cache entries...' if options.debug
      @@cache.delete_if do |title, entry|

	# Delete on this basis:
	#
	if ( NOW - entry.mtime ) / ONE_DAY > 7
	  true
	else
	  # Or count the positive/negative cache entries.
	  #
	  entry.rating ? positive += 1 : negative += 1
	  false
	end
      end

      if options.verbose
	$stderr.puts '%d rating(s) expired. %d remaining, of which %d positive and %d negative.' %
	  [ orig_size - @@cache.size, @@cache.size, positive, negative ]
      end

    else
      $stderr.puts 'No ratings cache file found.'
    end
  end


  # Dump the film ratings cache to disc.
  #
  def Rating.dump_cache(options)
    $stderr.puts 'Dumping ratings cache file...' if options.verbose
    File.open( options.ratings, 'w' ) { |f| SERIAL.dump( @@cache, f ) }

    if options.verbose
      $stderr.puts '%d ratings cache entries written.' % [ @@cache.size ]
    end
  end


  def Rating.rated
    @@rated
  end


  def Rating.not_rated
    @@not_rated
  end


  def Rating.cache_positives
    @@cache_positives
  end


  def Rating.cache_negatives
    @@cache_negatives
  end


  def initialize(rating)
    @rating = rating
    @ctime = @mtime = Time.now
  end
end


# A class for television programmes.
#
class Programme

  # Category translations.
  #
  CATEGORIES = { 'actie'	    => 'Action',
  		 'algemeen'	    => 'Misc',
  		 'atletiek'	    => 'Sports',
  		 'avontuur'	    => 'Action',
  		 'beeldende kunst'  => 'Arts/Culture',
  		 'detective'	    => 'Crime/Mystery',
  		 'documentaire'	    => 'Documentary',
  		 'drama'	    => 'Drama',
  		 'educatie'	    => 'Educational',
  		 'erotiek'	    => 'Adult',
  		 'extreme'	    => 'Sports',
  		 'gevechtssport'    => 'Sports',
  		 'gezondheid'	    => 'Health/Medical',
  		 'historisch'	    => 'History',
  		 'kids / jeugd'	    => 'Children',
  		 'kids/jeugd'       => 'Children',
  		 'Klussen'	    => 'HowTo',
  		 'koken'	    => 'Food',
  		 'komedie'	    => 'Comedy',
  		 'kunst / cultuur'  => 'Arts/Culture',
  		 'kunst/cultuur'    => 'Arts/Culture',
  		 'lifestyle'	    => 'Educational',
  		 'melodrama'	    => 'Soaps',
  		 'militair'	    => 'War',
  		 'mode'		    => 'Educational',
  		 'motorsport'	    => 'Sports',
  		 'musical'	    => 'Movies',
  		 'muziek'	    => 'Art/Music',
  		 'nieuws'	    => 'News',
  		 'paardensport'	    => 'Sports',
  		 'religie'	    => 'Spiritual',
  		 'romantiek'	    => 'Romance',
  		 'sci-fi'	    => 'SciFi/Fantasy',
  		 'show'		    => 'Game',
  		 'show/spelshow'    => 'Game',
  		 'speelfilm'	    => 'Movies',
  		 'sport'	    => 'Sports',
  		 'sportmagazine'    => 'Sports',
  		 'talkshow'	    => 'Talk',
  		 'teamsporten'	    => 'Sports',
  		 'technologie'	    => 'Science/Nature',
  		 'tekenfilms'	    => 'Children',
  		 'tennis / squash'  => 'Sports',
  		 'theater / dans'   => 'Arts/Culture',
  		 'thriller'	    => 'Crime/Mystery',
  		 'tuinieren'	    => 'HowTo',
  		 'voetbal'	    => 'Sports',
  		 'watersport'	    => 'Sports',
  		 'wintersport'	    => 'Sports',
  		 'vrije tijd'	    => 'Educational' }

  CACHE_FILE = 'programme.cache'
  LITERAL_AMPERSAND = /&(?!amp;)/

  @@cache = {}
  @@cached = 0

  attr_reader :actors, :category, :desc, :directors, :episode, :presenters,
	      :rating, :subcategory, :subtitle, :time, :title
  attr_accessor :start_time, :stop_time


  # Load the programme cache from disc.
  #
  def Programme.load_cache(options)
    if File.exist?( options.cache )
      $stderr.puts 'Loading programme cache file...' if options.verbose
      @@cache = File.open( options.cache ) { |f| Marshal.load( f ) } || {}

      if options.verbose
	$stderr.puts '%d programme cache entries found.' % [ @@cache.size ]
      end

    else
      $stderr.puts 'No programme cache file found.'
    end
  end


  # Dump the programme cache to disc.
  #
  def Programme.dump_cache(options)
    orig_size = @@cache.size

    # Expire any cache entries that end earlier than midnight today.
    #
    if options.debug
      $stderr.puts 'Removing expired programme cache entries...'
    end

    @@cache.delete_if do |prog_id, programme|
      programme.stop_time < NOW - ( NOW.to_i % ONE_DAY )
    end

    if options.verbose
      $stderr.puts '%d cached programmes(s) expired and %d remaining.' %
		   [ orig_size - @@cache.size, @@cache.size ]
    end

    $stderr.puts 'Dumping programme cache file...' if options.verbose
    File.open( options.cache, 'w' ) { |f| Marshal.dump( @@cache, f ) }

    if options.verbose
      $stderr.puts '%d programme(s) written to cache.' % [ @@cache.size ]
    end
  end


  # Obtain a list of a day' television programmes from a TV guide page.
  #
  def Programme.get_programme_list(guide)
    return [] if guide.nil?

    # Get start time, EID (unique programme ID) and the programme URL path.
    #
    guide.scan( /<tr>\s+<th\sclass="event[-_]starttime">\s+
		 <span>(\d+:\d+)<\/span>\s+
		 <\/th>\s+
		 <td\sclass="event-name"\sid="([^"]+)">\s+
		 <a\shref="([^"]+)">
		/mx ).collect { |prog| Programme.create( *prog ) }
  end


  # Instantiate a new Programme object or return an existing one if the
  # programme has been cached.
  #
  def Programme.create(start_time, prog_id, url_path)
    if class_variable_defined?( :@@cache ) && @@cache.key?( prog_id )
      return @@cache[prog_id]
    end

    new( prog_id, start_time, url_path )
  end


  def Programme.cached
    @@cached
  end


  def initialize(prog_id, start_time, url_path)
    @prog_id = prog_id
    @time = start_time
    @url_path = '/TV/wa/grid/?startDateTime=' + url_path.sub( /^.+_/, '' )
    @presenters = []
    @directors = []
    @actors = []
    @rating = nil
  end


  def cache
    @@cache[@prog_id] = self
    @@cached += 1
  end


  def cached?
    @@cache.key?( @prog_id )
  end


  # Obtain the title, broadcast time, description and category of a programme.
  #
  def get_detail(timescope, options)
    fetched = 0

    url = URI.parse( Grabber::BASE_URL + @url_path )
    req = Net::HTTP::Get.new( url.path )

    return 0 unless interim = Grabber.get_page( url, req, options )

    if m = interim.match( /
	     id="#{@prog_id}">\s+
	     .*?
	     <a\shref="([^"]+)">
	     /mx )
      detail_url = m[1]
    else
      #if options.ignore_errors
	#unless options.quiet
	  #$stderr.puts 'Warning: Ignoring a programme data error...'
	#end

	#return 0
      #end

      $stderr.puts 'Warning: Error while scanning page for programme details.'
      $stderr.puts "Programme ID was #{@prog_id}."
      #$stderr.puts "Page was:\n\n"
      #$stderr.puts interim
      #$stderr.puts "\nPlease include this text when reporting a problem."
      #exit 9
      return 0
    end

    url = URI.parse( Grabber::BASE_URL + detail_url )
    req = Net::HTTP::Get.new( url.path )

    return 0 unless detail = Grabber.get_page( url, req, options )

    # Needed for Ruby 1.9.x.
    #
    detail.force_encoding( 'UTF-8' ) if RUBY_VERSION >= '1.9.0'

    begin
      @title, @desc, @category, subcategory, duration = 
	detail.match( /
          <div\sid="programme_name">([^<]+)<\/div>\s+	# Title
          <div\sid\s=\s"programme_desc_text">
	    ([^<]+)					# Description
	  <\/div>\s+
	  .+?
	  <dt>Genre:<\/dt><dd>([^<]+)<\/dd>\s+		# Category
	  <dt>Subgenre:<\/dt><dd>([^<]+)<\/dd>.+?	# Subcategory
	  <dt>Duur:<\/dt><dd>(\d+)\smin<\/dd>		# Duration
	  /mx )[1..5]

      # Description may be right-padded with spaces.
      #
      @desc.rstrip!

    rescue NoMethodError

      # We probably tried to call [] on NilClass, which would mean that our
      # regex didn't match the program details. The cause is probably that a
      # page of Web server errors was returned instead of the expected page.
      #
      # On the other hand, the programme may be missing a duration.
      #
      unless detail =~ /<dt>Duur:<\/dt><dd>(\d+)\smin<\/dd>/
	unless options.quiet
	  $stderr.puts 'Warning: Programme has no end time or duration. Ignoring...'
	end

	return 0
      end

      #if options.ignore_errors
	#unless options.quiet
	  #$stderr.puts 'Warning: Ignoring a programme data error...'
	#end

	#return 0
      #end

      $stderr.puts "Warning: Error while scanning page for programme details."
      $stderr.puts "Programme ID was #{@prog_id}."
      #$stderr.puts detail
      #$stderr.puts "\nPlease include this text when reporting a problem."
      #exit 9
      return 0
    end

    fetched += 1

    if m = detail.match(
	     /
	      <span\sclass="datetime"><a\shref=".+?\/#{timescope}\/">
	      (#{@time}\s+-\s+\d+:\d+)
	     /x )
      @time = m[1]

    else
      if options.debug
	$stderr.puts '    Broadcast unknown on programme detail page.',
		     '    End time calculated from start time and duration.'
      end

      # If we reached here, it means that this particular broadcast isn't
      # known on the programme page referred to by the channel's day
      # programming page.
      #
      # That means we can't parse the end time of the broadcast, so we'll
      # have to calculate it from the start time and duration.
      #
      h, m = @time.split( ':' )
      m = m.to_i + duration.to_i
      h = ( h.to_i + m / 60 ) % 24
      m %= 60

      @time = '%s - %02d:%02d' % [ @time, h, m ]
    end

    if options.debug
      $stderr.puts <<"EOF"
    Title: #{@title}
    Programme ID: #{@prog_id}
    Time: #{@time}
    Category: #{@category}
EOF
    end

    unless @desc.empty?

      # Check for directors.
      #
      if m = detail.match( /<dt>Regie:<\/dt><dd>([^<]+)<\/dd>/ )
	@directors = m[1].split( ', ' )

	if options.debug
	  $stderr.puts "    Directors: #{@directors.inspect.delete( '"' )}"
	end
      end

      # Check for actors.
      #
      if m = detail.match( /<dt>(?:Cast|Met):<\/dt><dd>([^<]+)<\/dd>/ )
	@actors = m[1].split( ', ' )

	if options.debug
	  $stderr.puts "    Actors: #{@actors.inspect.delete( '"' )}"
	end
      end

      # Check for censor board rating.
      #
      if m = detail.match( /<dt>(?:Kijkwijzer|Leeftijd):<\/dt><dd>([^<]+)<\/dd>/ )
	@rating = m[1]

	if options.debug
	  $stderr.puts "    Censor board rating: #{@rating}"
	end
      end
    end

    # Can we figure out whether there's a presenter?
    #
    # We isolate from 'Presented by ' to the first full-stop.
    #
    # The 'pres != desc' check is required in order to know whether
    # substitution actually occurred.
    #
    if ( pres = @desc.sub( /^.*(?:(?:Presented|Hosted) by|Gepresenteerd door) (.+?)\..*$/i, "\\1" ) ) && pres != @desc

      # Grab each string of consecutive words that start with a capital
      # letter. That should give us an array of names, each of which
      # possibly has a trailing space.
      #
      @presenters =
	pres.scan( /((?:[[:upper:]](?:'[[:upper:]])?[[:lower:]]+ ?)+)/ ).
	  flatten
      @presenters.each { |presenter| presenter.rstrip! }

      if options.debug
	$stderr.puts "    Presenters: #{presenters.inspect.delete( '"' )}"
      end

    end

    $stderr.puts if options.debug

    fetched
  end


  # Derive subtitle, episode number, etc. for a programme.
  #
  def derive_fields(options)

    # Remove any unhelpful text from the end of the description.
    #
    #@desc.sub!( / ?Niet beschikbaar$/, '' )
  
    # If the title features a colon, make the left-hand side the title and the
    # right-hand side the subtitle, except when the colon appears to be the
    # separator in a time, i.e. HH:MM. Otherwise, keep the title as is.
    #
    @title, @subtitle = @title.split( /:+\s*(?!\d)\s*/, 2 )
  
    unless @subtitle
  
      # We still don't have a subtitle, so check whether the first sentence of
      # the description contains only words that begin with a capital letter.
      #
      # We also allow digits and punctuation, so that we can recognise cardinal
      # and ordinal numbers, complex sentences, names containing initials, etc.
      #
      # The desc.match pattern is a short-circuit operation that quickly
      # enables us to determine whether the description can possibly contain a
      # usable description. This saves time by not applying the complex regex
      # unless necessary. Otherwise, there's a chance the program will get
      # stuck performing exponential backtracking.
      #
      # An example of a problematic description that would cause the program to
      # get stuck is the following:
      #
      # 1300BST: US PGA Tour Golf, 1400BST: Challenge Series Golf, 1530BST: WTA
      # Tennis, 1600BST: ICC Cricket, 1630BST: ATP Tennis.
      #

      # Needed for Ruby 1.9.x.
      #
      @desc.force_encoding( 'ASCII-8BIT' ) if RUBY_VERSION >= '1.9.0'

      if @desc.match( /[.!?]+\s+/ ) &&
	 @subtitle = @desc.match(
	/^(
  	 (?:
  	    (?:
  	       # Start with a single digit or capital letter...
  	       #
  	       [\d[:upper:]]
  
  	       (?:
  		  (?:
  		     # ...followed by zero or more of:
  		     #
  		     # - alphanumeric characters
  		     # - various punctuation characters, or...
  		     #
  		     [[:alpha:]\d"\#$%'),\-\/:;\]\}] |
  
  		     # - a UTF-8 accented alpha character. See:
  		     #   www.utf8-chartable.de unicode-utf8-table.pl
  		     #
  		     (?:\xC3[\x80-\xBF])
  		  )*
  		  (?:
  		     # Alternatively, this may catch Ep. 27 style episode
  		     # numbering...
  		     #
  		     \.?\s\d+
  
  		  )? |
  
  		  # ...and this might catch initials in names.
  		  #
  		  \.?
  	       ) |
  	       (?:
  		  # If it's not a word, these are the likely word
  		  # separators...
  		  #
  		  [\s&(*+\-<=>\[{] |
  
  		  # or we may find some HTML entities, lower-case articles,
  		  # conjunctions or prepositions...
  		  #
  		  # ...either in English...
  		  #
  		  &amp;|'n'|a|an|and|for|in|of|on|or|the|to|
  
  		  # ...or in Dutch:
  		  #
  		  de|en|het|naar|tot|van|voor|vs
  	       )+
  	    )
  	 )+
  
  	 # End with one or more full-stops, exclamation marks or question
  	 # marks, followed by one or more spaces.
  	 #
  	 [.!?]+\s+
	 )
	/x )
  
	# Remove the subtitle from the start of the description.
	#
	@desc.slice!( @subtitle[1] )
	@subtitle = @subtitle[1].rstrip
      end

    end
  
    if @subtitle
      # Sometimes, the subtitle will simply duplicate the programme title
      # (except, perhaps, for a trailing full-stop). In this case, we abandon
      # the subtitle.
      #
      pruned_sub = @subtitle.chomp( '.' )
  
      if pruned_sub == @title
	@subtitle = nil
  
      # Prune any trailing full-stop from subtitle if requested, but be careful
      # not to prune the last dot of an ellipsis.
      #
      elsif options.prune_subs && pruned_sub.chomp( '.' ) == pruned_sub
	@subtitle = pruned_sub
      end
    end
  
    # Detect trailing episode number in the description and use this as
    # subtitle if available. If we already have a subtitle, append the episode
    # number to increase the likelihood of uniqueness.
    #
    if @episode = @desc.match( /( (?:deel|part|episode) (\d+)\.?)$/i )
      @desc.slice!( @episode[1] )
      @subtitle = ( ( @subtitle || '' ) + @episode[1] ).lstrip
  
      # Remember just the episode number.
      #
      @episode = @episode[2].to_i
    end
  
    @title.gsub!( LITERAL_AMPERSAND, '&amp;' )
    @subtitle.gsub!( LITERAL_AMPERSAND, '&amp;' ) if @subtitle
    @desc.gsub!( LITERAL_AMPERSAND, '&amp;' )

    if RUBY_VERSION >= '1.9.0'
      @title.force_encoding( 'UTF-8' )
      @subtitle.force_encoding( 'UTF-8' ) if @subtitle
      @desc.force_encoding( 'UTF-8' )
    end
  end

end


# A class for television channels.
#
class Channel

  attr_reader :number, :cid, :name


  # Obtain the list of available TV channels from UPC.
  #
  def Channel.get_available(options)
    url = URI.parse( Grabber::BASE_URL + '/TV/wa/grid/?startDateTime=%s' %
		     [ NOW.strftime( '%Y-%m-%dT%H:%M:%SZ' ) ] )
  
    req = Net::HTTP::Get.new( url.path + '?' + url.query )
    page = Grabber.get_page( url, req, options )
  
    if page.nil?
      unless options.quiet
	$stderr.puts 'Failed to fetch channel list from UPC. Aborting...'
      end
  
      exit 5
    end

    # Needed for Ruby 1.9.x.
    #
    page.force_encoding( 'UTF-8' ) if RUBY_VERSION >= '1.9.0'
  
    # Create a hash of channel names and channel ids.
    #
    upc_channels = {}
    upc_cids = {}

    # Replace any ampersand entities with literal ampersands and slashes with
    # backslashes.
    #
    page.scan(
      /<li\sclass="channel_info"\sid="channel_(\d+)">\s+
       <a\sclass="channel_logo"\stitle="([^"]+)"\s+
        href=".+?\/(..)\/[^\/]+\/">
      /mx ).collect do |num, name, cid|

      # Names may be right-padded with spaces.
      #
      name.rstrip!
      upc_channels[num.to_i] = name.gsub( /&amp;/, '&' ).gsub( /\//, '\\' )
      upc_cids[num.to_i] = cid
    end

    if upc_channels.size.zero?
      unless options.quiet
	$stderr.puts 'No channels found at UPC. Aborting...'
      end
  
      exit 5
    end

    if options.verbose
      $stderr.puts 'UPC nominally has data for %d channels.' %
	[ upc_channels.size ]
  
      if options.debug
	$stderr.puts "Those channels are:\n\n"
  
	upc_channels.sort do |a, b|
	  # Sort on channel name, then channel number, because some sport
	  # channels are identically named.
	  #
	  ( a[1].downcase <=> b[1].downcase ).nonzero? ||
	  ( a[0] <=> b[0] )
	end.each do |num, name|
	  $stderr.puts "%3d %s" % [ num, name ]
	end
  
	$stderr.puts
      end
  
    end
  
    [ upc_channels, upc_cids ]
  end


  def initialize(number, name, cid)
    @number = number
    @name = name
    @cid = cid
  end


  # Check to see whether UPC recognises a given channel.
  #
  def check(upc_channels)
    if ! upc_channels.values.include?( @name )
      $stderr.puts 'Warning: UPC may not know of the channel called %s.' %
	[ @name ]
  
      stub = @name.sub( /[^[:alpha:]].*$/, '' )
      upc_channels.values.each do |upc_name|
	if stub.length > 0 && upc_name.index( stub )
  	$stderr.puts 'Perhaps you mean %s.' % [ upc_name ]
	end
      end
    end
  end


  # Obtain a TV guide page of programming for a given day.
  #
  def get_tv_guide(day, options)
    Guide.new( @name, @cid, day, options )
  end


  # Determine the URL path to a logo for a given channel.
  # 
  def icon_path(guide)
    guide.match( /<img\s+alt="[^"]+"\s
			 id="channel[-_]logo"\s
			 src="([^"]+)"
		 /mx )[1]
  end

end


# A class for TV programme guide pages.
#
class Guide < String

  def initialize(channel, cid, day, options)
    url = '/TV/Guide/Channel/%s/%s/%s/' % [ Grabber.url_encode( channel ),
					    cid, day ]
    url = URI.parse( Grabber::BASE_URL + url )
  
    req = Net::HTTP::Get.new( url.path )
    html = Grabber.get_page( url, req, options )

    html.nil? ? nil : super( html )
  end

end


# A class for determining programme options.
#
class ProgramOptions

  def self.parse(args)
    o = OpenStruct.new

    op = OptionParser.new do |opt|
      opt.banner = "Usage: #{Grabber::PROGRAM} [options]"
      opt.version = VERSION

      opt.define( "\n  All options may be specified using the shortest " +
		  'unique string.' )
      opt.define( '  For example, --output-file may be abbreviated to ' +
		  "--output or even --out.\n")
      opt.define( "  See #{Grabber::URL} for further details.\n" )

      o.actual = true
      opt.define( '--[no-]actual',
		  'Ignore programmes that have already ended.',
		  "(default: #{o.actual})" ) do |actual|
	o.actual = actual
      end

      # Capabilities are defined here:
      #
      # http://www.xmltv.org/wiki/xmltvcapabilities.html
      #
      opt.define( '--capabilities',
		  "List this grabber's capabilities and exit." ) do
	puts Grabber::CAPABILITIES
	exit
      end

      o.cache = File.join( Grabber::WORKING_DIR, Programme::CACHE_FILE )
      opt.define( '--[no-]cache [DIR]',
		  'Cache programme data (in DIR, if given).',
		  '(default:',
		  o.cache + ')' ) do |cache|
      	if cache.nil?
	  o.cache = File.join( Grabber::WORKING_DIR, Programme::CACHE_FILE )
	elsif cache
	  o.cache = File.join( File.expand_path( cache ),
			       Programme::CACHE_FILE )
	else
	  o.cache = false
	end
      end
 
      o.cattrans = true
      opt.define( '--[no-]cattrans', 'Perform category translation.',
		  "(default: #{o.cattrans})" ) do |trans|
	o.cattrans = trans
      end

      o.config = File.join( Grabber::WORKING_DIR, 'tv_grab_nl_upc.conf' )
      opt.define( '--config-file FILE',
		  'File containing channel numbers and names.',
		  '(default:',
		  o.config + ')' ) do |file|
	o.config = File.expand_path( file )
      end

      o.configure = false
      opt.define( '--configure', 'Create a configuration file',
		  '(will overwrite existing file).' ) do |config|
	o.configure = config
      end

      o.days = 7
      opt.define( '--days NUMBER',
		  'Fetch data for NUMBER days (1 = today only)',
		  "(default: #{o.days})",
		  Integer ) do |days|
	o.days = days
      end

      o.debug = false
      opt.define( '--debug', 'Print debugging messages. Debugging can',
		  'be dynamically toggled with SIGUSR1.' ) do |debug|
	o.debug = debug
      end

      opt.define( '--description', 'Describe this grabber and exit.' ) do
	puts Grabber::DESC
	exit
      end

      opt.define( '--help', 'Display this usage message and exit.' ) do
	puts opt
	exit
      end

      o.icons = false
      opt.define( '--icons',
		  'Include icon links for the channels.' ) do |icons|
	o.icons = icons
      end

      o.ignore_errors = false
      opt.define( '--ignore-errors',
		  'Ignore errors that occur during programme',
		  "fetching. (default: #{o.ignore_errors})" ) do |ig_err|
	o.ignore_errors = ig_err
      end

      o.ignore_negatives = false
      opt.define( '--ignore-negatives',
		  'Ignore negative entries in IMDB cache.',
		  "(default: #{o.ignore_negatives})" ) do |ig_neg|
	o.ignore_negatives = ig_neg
      end

      opt.define( '--[no-]ignore-old', 'Alias for --actual.' ) do |actual|
	o.actual = actual
      end

      opt.define( '--logos', 'Alias for --icons.' ) { |icons| o.icons = icons }

      o.offset = 0
      opt.define( '--offset DAYS',
		  'Start day from which to start fetching.',
		  "(default: #{o.offset})", Integer ) do |days|
	o.offset = days
      end

      o.output = nil
      opt.define( '--output-file FILE',
		  '(N.B. Filled in by mythfilldatabase.)' ) do |file|
	o.output = file
      end

      o._prune_subs = false
      opt.define( '--prune-subtitles',
		  'Prune trailing full-stop from subtitles.',
		  "(default: #{o.prune_subs})" ) do |prune|
	o.prune_subs = prune
      end

      o.quiet = false
      opt.define( '--quiet', 'Suppress warnings and errors.' ) do |quiet|
	o.quiet = quiet
      end

      o.ratings = false
      opt.define( '--[no-]ratings [DIR]',
		  'Attempt to dynamically rate films via IMDB',
		  'and cache results (in DIR, if given).',
		  "(default: #{o.ratings})" ) do |ratings|
	if ratings.nil?
	  o.ratings = File.join( Grabber::WORKING_DIR, Rating::CACHE_FILE )
	elsif ratings
	  o.ratings = File.join( File.expand_path( ratings ),
				 Rating::CACHE_FILE )
	end
      end

      o.sanity = false
      opt.define( '--sanity-check',
		  'Perform sanity checks before pulling data.' ) do |sanity|
	o.sanity = sanity
      end

      o.sleep = 1.0
      opt.define( '--sleep SECONDS',
		  'Sleep this many seconds between each page',
		  "fetch. (default: #{o.sleep})",
		  Float ) do |secs|
	o.sleep = secs
      end

      o.static_ratings = false
      opt.define( '--static-ratings',
		  'Attempt to statically rate films via IMDB.',
		  "(default: #{o.static_ratings})" ) do |ratings|
	o.static_ratings = ratings
      end

      o.threads = false
      opt.define( '--[no-]threads',
		  'Use threads to fetch guide data in',
		  'parallel. Use with caution.',
		  "(default: #{o.threads})" ) do |threads|
	o.threads = threads
      end

      o.tries = 3
      opt.define( '--tries TRIES',
		  'Number of times to try each page fetch.',
		  "(default: #{o.tries})",
		  Integer ) do |tries|
	o.tries = tries
      end

      o.user_agent = '%s v%s' % [ Grabber::PROGRAM, Grabber::VERSION ]
      opt.define( '--user-agent STRING',
		 'Masquerade as this user agent when talking',
		 'to Web servers.',
		  "(default: #{o.user_agent})" ) do |ua|
	o.user_agent = ua
      end

      o.verbose = false
      opt.define( '--verbose', 'Print informational messages. Verbosity',
		  'can be dynamically toggled with SIGUSR2.' ) do |verbose|
	o.verbose = verbose
      end

      opt.define( '--version', 'Display program version and exit.' ) do
	puts '%s v%s  (C) 2006-2011 Ian Macdonald' %
	  [ Grabber::PROGRAM, Grabber::VERSION ]
	exit
      end

      o.xmltvid_suffix = '.chello.nl'
      opt.define( '--xmltvid-suffix STRING',
		  'Suffix to add to channel number to form',
		  "XMLTV ID. (default: #{o.xmltvid_suffix})" ) do |suf|
	o.xmltvid_suffix = suf
      end

    end

    options = op.parse!( args )
    return o

  end
end


# Parse options.
#
options = ProgramOptions.parse( ARGV )

$stderr.puts 'v%s %s' % [ Grabber::VERSION, Grabber::RELEASE ] if options.debug

# Trap ^C.
#
Signal.trap( 'INT' ) { puts 'Interrupted.' unless options.quiet; exit 255 }

# Toggle debug flag if we catch a USR1 signal. This allows us to dynamically
# enable debugging output.
#
Signal.trap( 'USR1' ) { options.debug = ! options.debug }

# Toggle verbose flag if we catch a USR2 signal. This allows us to dynamically
# enable verbose output.
#
Signal.trap( 'USR2' ) { options.verbose = ! options.verbose }

# Determine whether to send output to a file or stdout.
#
output = options.output ? File.new( options.output, 'w' ) : $stdout

# Perform pre-execution checks if desired.
#
Grabber.pre_checks( options ) if options.sanity
 
# Create working directory if necessary.
#
Grabber.create_directory( Grabber::WORKING_DIR, options )

if options.configure
  Grabber.configure( options )
  exit
end

# Check for config file.
#
begin
  channel_file = File.open( options.config )
rescue Errno::ENOENT
  $stderr.puts 'Please create the config file %s.' % [ options.config ]
  exit 1
end

upc_channels, upc_cids = Channel.get_available( options )

# Read config file.
#
channels = Grabber.read_config( channel_file, upc_cids, options.verbose )
if channels.size == 0
  $stderr.puts 'No channels defined in %s.' % [ options.config ]
  exit 2
end

# Create an array containing the English days of the week, starting with today.
#
DAYS_OF_WEEK = Array.new( 7 ) do |day|
  ( NOW + ONE_DAY * day ).strftime( '%A' )
end

# Bail out if changing to/from summer time causes the array of days of the
# week to contain duplicates.
#
unless DAYS_OF_WEEK == DAYS_OF_WEEK.uniq
  $stderr.puts <<'EOF' unless options.quiet
Summer/winter time offsets make the program unsafe to run at the moment.
Please try again in an hour.
EOF
  exit 8
end

if options.days > 8
  if options.verbose
    $stderr.puts \
      'Warning: UPC carries a maximum of 8 days of data only. Adjusting...'
  end

  options.days = 8
end

Programme.load_cache( options ) if options.cache

if options.ratings && ! options.static_ratings
  unless options.quiet
    $stderr.puts 'Warning: Use of --ratings violates IMDB terms of use:',
		 '         http://www.imdb.com/help/show_leaf?usedatasoftware',
		 '         Consider using --static-ratings instead.'
  end

  Rating.load_cache( options )
end

if options.static_ratings
  Rating.get_ratings_list( options )
  options.ratings = true
end

if options.sanity && options.verbose
  channels.each_value do |channel|
    channel.check( upc_channels )
  end
end

# Create XML document for XMLTV output.
#
doc = Document.new( '<!DOCTYPE tv SYSTEM "xmltv.dtd">' )
el_tv = doc.add_element( 'tv' )

# Data concerning TV listings source and this program.
#
el_tv.attributes['source-info-url'] = Grabber::GUIDE
el_tv.attributes['source-info-name'] = 'UPC TV Guide'
el_tv.attributes['generator-info-name'] = "#{Grabber::PROGRAM} $Id: tv_grab_nl_upc,v 1.228 2011/04/05 07:33:12 ianmacd Exp $"
el_tv.attributes['generator-info-url'] = Grabber::URL

start_day = 0
now = NOW

if options.sanity

  # Check to ensure that today's listing actually does correspond with the day 
  # on which we're running the program. Between 00:00 and 01:00, for example,
  # we may be given the listing of the previous day.

  # The channel is unimportant, so we'll just use Nederland 1 and RTL 4.
  #
  opts = options.clone
  opts.verbose = false
  opts.quiet = false

  # FIXME: Don't like the hard-coded channel numbers here.
  #
  unless upc_channels.key?( 1 ) && upc_channels.key?( 4 )
    unless options.quiet
      $stderr.puts 'Channel data for sample sample channel guide page fetch missing. Aborting...'
    end

    exit 5
  end

  # FIXME: Don't like the hard-coded channel numbers here, either.
  #

  # Nederland 1
  #
  guide1 = Channel.new( 1, upc_channels[1], upc_cids[1] ).
	   get_tv_guide( 'Today', opts )

  # RTL 4
  #
  guide2 = Channel.new( 4, upc_channels[4], upc_cids[4] ).
	   get_tv_guide( 'Today', opts )

  if guide1.empty? && guide2.empty?
    unless options.quiet
      $stderr.puts 'Sample channel guide page fetch failed. Aborting...'
    end

    exit 5
  end

  if ( nr_progs = ( Programme.get_programme_list( guide1 ).size +
		    Programme.get_programme_list( guide2 ).size ) ) < 10
    $stderr.puts <<"EOF"
A sample fetch of today's data for Nederland 1 and RTL 4 returned just #{nr_progs}
programme(s). This almost certainly indicates a problem with the TV guide.
EOF
    exit 3
  end

  # Get the schedule date.
  #
  date = guide1.match(
    /<div id="channel[-_]date">\s+<span>(.+dag \d+ \w+)<\/span>/m )[1]
  weekday, dom, month = date.split

  WEEKDAYS = %w[ zondag maandag dinsdag woensdag donderdag vrijdag zaterdag ]

  if ( today = NOW.wday ) != WEEKDAYS.index( weekday )

    # Weekday on which we're being run doesn't match guide day.
    #
    if ( today - 1 ) % 7 == WEEKDAYS.index( weekday )

      # It looks as if yesterday's guide is still in place. We're probably
      # being run shortly after midnight.
      #
      $stderr.puts <<"EOF" unless options.quiet
It looks as if yesterday's guide is still in place. You could try running the
program later, at which time the guide will hopefully have been rotated.

EOF
      max_hour = 5 

      if now.hour >= max_hour
	$stderr.puts <<"EOF" unless options.quiet
It's now after #{"%02d" % max_hour}:00 and the guide should really have been
rotated for today. Please investigate this matter, as it may not be safe to
continue.
EOF
	exit 4

      else

	$stderr.puts <<"EOF" unless options.quiet
In the meantime, I'll make the necessary chronological adjustments and
continue. The danger here, however, is that the guide may yet be updated
whilst the program is still running. Continue at your own risk.
EOF
      end

      # Make the necessary time adjustments, so that we don't put guide data
      # for e.g. Saturday into the schedule for Sunday.
      #
      # N.B. There's a very bad race condition here, as the guide may still be
      #      rotated during execution, which would result in:
      #
      #      - some channels having no data fetched for the new day (day 0)
      #
      #      - programme data for days 1 and higher on those same channels
      #	       would be off by -24 hours, i.e. a programme that should be
      #	       inserted on Monday afternoon would instead be inserted on
      #	       Sunday afternoon.
      #
      now -= ONE_DAY
      start_day += 1

    else

      $stderr.puts <<"EOF" unless options.quiet
The guide is not today's, but it's not yesterday's, either. It's for
'#{date}'. I don't know how to deal with this situation.

Please have a look at this page and work out what's going on:

  #{url}
EOF

      exit 4

    end
  end
end

# Calculate midnight today, adjusting for offset from UTC.
#
midnight_today = now - ( ( now + now.utc_offset ).to_i % ONE_DAY )

# Set up channels.
#
channels.each_key do |channum|

  # Add channel.
  #
  el_chan = el_tv.add_element( 'channel' )

  # Add XMLTV ID.
  #
  el_chan.attributes['id'] = channum.to_s + options.xmltvid_suffix

  # Add channel name.
  #
  el_display_name = el_chan.add_element( 'display-name' )
  el_display_name.attributes['lang'] = 'nl'
  el_display_name.text = channels[channum].name

  # el_icon = el_chan.add_element( 'icon' )
end

# Remember which channels have no programming data on certain days.
#
missing = Array.new( options.days ) { Array.new }

categories = Hash.new { |hash, key| hash[key] = [] }
programme_total = 0
cached_total = 0
ignored_total = 0
rating_total = 0
fetches = 0

# Iterate over all channels.
#
data_fetcher = Proc.new do |channum|
  ch_prog_total = 0
  ch_cached_total = 0
  ch_ignored_total = 0
  ch_rating_total = 0

  if options.verbose
    $stderr.puts 'Fetching data for %s (%d)...' %
      [ channels[channum].name, channum ]
  end

  first_fetch = true

  # Iterate over days, starting at today + any offset, and go the required
  # number of days + any offset.
  #
  ( start_day + options.offset ).
    upto( start_day + options.days - 1 + options.offset ) do |day|

    if options.verbose
      $stderr.puts '  Fetching day %d for %s...' % [ day,
						     channels[channum].name ]
    end

    # Figure out which string we need for the day.
    #
    timescope = case day
		  when 0 then 'Today'
       	 	  when 1 then 'Tomorrow'
       	 	  else DAYS_OF_WEEK[day % 7]
       	 	end

    guide = channels[channum].get_tv_guide( timescope, options )

    if guide.empty? && ! options.quiet
      $stderr.puts '  Failed to fetch day %d for %s.' %
	[ day, channels[channum].name ]
    end

    # If this is the first page fetch for this channel and --icons or --logos
    # was given, insert a link to the channel's icon.
    #
    if ! guide.nil? && options.icons && first_fetch
      begin
	el_tv.elements["channel[@id='#{channum}#{options.xmltvid_suffix}']"].
  	  add_element( 'icon' )
	el_tv.
	  elements["channel[@id='#{channum}#{options.xmltvid_suffix}']/icon"].
  	  attributes['src'] =
	    Grabber::BASE_URL + channels[channum].icon_path( guide )

      # A failed match will result in an attempt to call NilClass#[], which
      # doesn't exist.
      #
      rescue NoMethodError
	unless options.quiet
	  $stderr.puts '    Failed to determine an icon path for %s.' %
	    [ channels[channum].name ]
	end
      end

      first_fetch = false
    end

    # Create an array of all programmes.
    #
    programmes = Programme.get_programme_list( guide )

    if options.verbose
      $stderr.puts '    Found %d programme(s) for day %d on %s.' %
	[ programmes.size, day, channels[channum].name ]
    end

    ch_prog_total += programmes.size
    day_cached_total = 0
    day_ignored_total = 0
    day_rating_total = 0
    fetches += 1

    # Remember this channel if it has no programme data for today.
    #
    if programmes.size == 0

      # Record this as a failure.
      #
      missing[day + start_day - options.offset] << channum.to_i
    end

    after_midnight = nil
    done_old_progs = false

    # Calculate midnight on day that has just been fetched.
    # 
    midnight_on_day = midnight_today + day * ONE_DAY

    # Iterate over all programmes.
    #
    programmes.each do |programme|

      # Skip this programme if it began earlier than the current time.
      # Actually, we test only whether its start hour is earlier than the
      # current hour.
      #
      if timescope == 'Today' && options.actual
	if programme.time.to_i < NOW.hour && ! done_old_progs
	  if options.debug
	    $stderr.puts '    Ignoring old programme broadcast at %s...' %
	      [ programme.time.sub( /\s.+$/, '' ) ]
	  end

	  day_ignored_total += 1
	  next
        else
	  done_old_progs = true
	end
      end

      if options.cache && programme.cached?
	day_cached_total += 1

      else

	# Get detailed programme information and increment the guide page
	# counter.
	#
	fetch = programme.get_detail( timescope, options )
	next if fetch.zero?

	fetches += fetch

	# Get the start and stop times.
	#
	start, stop = programme.time.scan( /\d+:\d+/ )
	start_hours, start_mins = start.split( /:/ ).collect { |x| x.to_i }
	stop_hours, stop_mins = stop.split( /:/ ).collect { |x| x.to_i }

	# Remove junk from fields.
	#
	programme.derive_fields( options )

	# Each page contains programmes that start in the early hours of the
	# morning. However, those at the top of the page start after midnight
	# today, whereas those at the bottom start after midnight tonight and
	# therefore technically belong to tomorrow's programmming.
	#
	after_midnight ||= false if start_hours > 9

	if start_hours < 10 && after_midnight == false
	  after_midnight = true
	end

	# Calculate programme start and stop times in UNIX time.
	#
	programme.start_time =
	  midnight_on_day + start_hours * 3600 + start_mins * 60
	programme.stop_time =
	  midnight_on_day + stop_hours * 3600 + stop_mins * 60

	# Add a day if this programme starts and ends after midnight.
	#
	if after_midnight
	  programme.start_time += ONE_DAY
	  programme.stop_time += ONE_DAY
	end

	# If the hour of the stop time is less than the hour of the start time,
	# the programme finishes after midnight and needs a day added to the
	# stop time. The same is true if the hour of the stop time is the same
	# as that of the start time, but the minute of the stop time is less
	# than or equal to that of the start time.
	#
	if start_hours > stop_hours ||
	  ( start_hours == stop_hours && start_mins >= stop_mins )
	  programme.stop_time += ONE_DAY
	end

      end

      # Issue a warning if the program starts and ends across a summer/winter
      # time change.
      #
      if programme.start_time.utc_offset != programme.stop_time.utc_offset &&
	 ! options.quiet
	from, to = programme.start_time.utc_offset -
		   programme.stop_time.utc_offset > 0 ?
		   %w[ summer winter ] : %w[ winter summer ]
	$stderr.puts "    Warning: '%s' starts at %s in %s time, but ends\n" \
		     '             at %s in %s time. ' \
		     'There may be an error of +/- 1 hour.' %
		     [ programme.title,
		       programme.start_time.strftime( '%H:%M' ),
		       from,
		       programme.stop_time.strftime( '%H:%M' ),
		       to ]
      end

      # Prepare XMLTV ID of the form channum + suffix.
      #
      el_prog = el_tv.add_element( 'programme' )
      el_prog.attributes['channel'] = channum.to_s + options.xmltvid_suffix

      # Format start and stop time.
      #
      el_prog.attributes['start'] =
	programme.start_time.strftime( '%Y%m%d%H%M%S %z' )
      el_prog.attributes['stop'] =
	programme.stop_time.strftime( '%Y%m%d%H%M%S %z' )

      # Add title.
      #
      el_title = el_prog.add_element( 'title' )
      el_title.attributes['lang'] = 'nl'

      # Don't convert apostrophes etc. in the title to entities on output.
      #
      el_title.text = Text.new( programme.title, false, nil, true )

      # Add subtitle, if we've derived one.
      #
      if programme.subtitle
	el_subtitle = el_prog.add_element( 'sub-title' )
	el_subtitle.attributes['lang'] = 'nl'

	# Don't convert apostrophes etc. in the subtitle to entities on output.
	#
	el_subtitle.text = Text.new( programme.subtitle, false, nil, true )
      end

      # Add episode number, if we've found one.
      #
      if programme.episode
	el_episode = el_prog.add_element( 'episode-num' )
	el_episode.attributes['system'] = 'xmltv_ns'
	el_episode.text = '.%d.' % [ programme.episode - 1 ]
      end

      # Add description, if there is one.
      #
      unless programme.desc.empty?
	el_desc = el_prog.add_element( 'desc' )
	el_desc.attributes['lang'] = 'nl'

	# Don't convert apostrophes etc. in the desc. to entities on output.
	#
	el_desc.text = Text.new( programme.desc, false, nil, true )
      end

      # Deal with credits, if there are any.
      #
      unless programme.directors.empty? &&
	     programme.actors.empty? &&
	     programme.presenters.empty?
	el_credits = el_prog.add_element( 'credits' )

	# Presenters.
	#
	programme.presenters.each do |presenter|
	  el_presenter = el_credits.add_element( 'presenter' )

	  # Don't convert apostrophes etc. in the presenter's name to entities
	  # on output.
	  #
	  el_presenter.text = Text.new( presenter, false, nil, true )
	end

	# Directors.
	#
	programme.directors.each do |director|
	  el_director = el_credits.add_element( 'director' )

	  # Don't convert apostrophes etc. in the director's name to entities
	  # on output.
	  #
	  el_director.text = Text.new( director, false, nil, true )
	end

	# Actors.
	#
	programme.actors.each do |actor|
	  el_actor = el_credits.add_element( 'actor' )

	  # Don't convert apostrophes etc. in the actor's name to entities on
	  # output.
	  #
	  el_actor.text = Text.new( actor, false, nil, true )
	end
      end

      if options.cattrans

	# Add translated category, if we recognise it.
	#
	if Programme::CATEGORIES.key?( programme.category.downcase )
	  el_category = el_prog.add_element( 'category' )
	  el_category.attributes['lang'] = 'en'
	  el_category.text = Programme::CATEGORIES[programme.category]
	else
	  categories[programme.category] <<
	    [ channum, el_prog.attributes['start'],
	      el_prog.attributes['stop'], programme.title ]
	end

      else
	# Add untranslated category.
	#
	el_category = el_prog.add_element( 'category' )
	el_category.attributes['lang'] = 'nl'
	el_category.text = programme.category
      end

      if options.ratings

	# UPC give us almost no way to determine that a programme is a movie.
	# We know that programmes with a genre of 'speelfilm' are films, but
	# we'll also assume that programmes on channels whose name starts with
	# the word 'Film' are films. This catches, for example, the Film1 set
	# of channels: http://www.film1.nl/
	#
	# We further assume that any programme that lasts between 80 minutes
	# and 4 hours, and is of a credible genre, is also a film. We may not
	# always be right, but even when we're wrong, the negative effect is
	# negligible, as we'll mostly just fail to find a rating for the title.
	#
	duration = ( programme.stop_time - programme.start_time ) / 60

	if programme.category =~ /^[Ss]peelfilm$/ ||
	   channels[channum].name =~ /^Film/ ||
	   ( duration >= 80 && duration <= 240 &&
	     programme.category =~ /^(?:actie|avontuur|detective|drama|komedie|
				    romantiek|sci-fi|tekenfilms|thriller)$/ix )
	  if star_rating = Rating.imdb_rating( programme.title, options )
	    el_star_rating = el_prog.add_element( 'star-rating' )
	    el_star_value = el_star_rating.add_element( 'value' )
	    el_star_value.text = star_rating
	    day_rating_total += 1
	  end
	end
      end

      # Deal with censor board's (Kijkwijzer) rating.
      #
      if programme.rating
	el_rating = el_prog.add_element( 'rating' )
	el_rating.attributes['system'] = 'Kijkwijzer'
	el_value = el_rating.add_element( 'value' )
	el_value.text = programme.rating
      end

      programme.cache if options.cache && ! programme.cached?
    end

    ch_cached_total += day_cached_total
    ch_ignored_total += day_ignored_total
    ch_rating_total += day_rating_total

    if options.verbose
      if options.cache && day_cached_total > 0
	$stderr.puts '    Found %d programme(s) in cache.' %
		     [ day_cached_total ]
      end

      if options.actual && day_ignored_total > 0
	$stderr.puts '    Ignored %d old programme(s).' % [ day_ignored_total ]
      end

      if options.ratings && day_rating_total > 0
	$stderr.puts '    Rated %d film(s) via IMDB.' % [ day_rating_total ]
      end
    end

  end

  if options.verbose
    $stderr.puts '  Found a total of %d programme(s) for %s (%d).' %
      [ ch_prog_total, channels[channum].name, channum ]

    if options.cache
      $stderr.puts '  Found %d programme(s) in cache.' % [ ch_cached_total ]
    end

    if options.actual
      $stderr.puts '  Ignored %d old programme(s).' % [ ch_ignored_total ]
    end

    if options.ratings
      $stderr.puts '  Rated %d film(s) via IMDB.' % [ ch_rating_total ]
    end

    $stderr.puts
  end

  programme_total += ch_prog_total
  cached_total += ch_cached_total
  ignored_total += ch_ignored_total
  rating_total += ch_rating_total
end

if options.threads
  threads = []

  if options.static_ratings
    # We need a mutex for to protect the ratings hash from access while we're
    # building it.
    #
    require 'thread'
    options.threads = Mutex.new
  end
end

channels.keys.sort.each do |ch|
  if options.threads
    threads << Thread.new( ch ) { |chan| data_fetcher.call( chan ) }
  else
    data_fetcher.call( ch )
  end
end
threads.each { |thread| thread.join } if options.threads

# Write out UTF-8 XML.
#
doc.write( output, -1 )

Programme.dump_cache( options ) if options.cache

if options.ratings && ! options.static_ratings
  Rating.dump_cache( options )
end

# Report on data processed.
#
if options.verbose
  elapsed = Time.now - NOW
  $stderr.puts '%d programme(s) found on %d channel(s).' %
    [ programme_total, channels.size ]
  $stderr.puts '%d guide page(s) fetched in total.' % [ fetches ]

  if options.cache
    $stderr.puts '%d programme(s) (%d%%) found in cache and %d (%d%%) added to cache.' %
      [ cached_total,
	cached_total.zero? ?
	  0 : ( ( cached_total.to_f / programme_total ) * 100 ).round,
	Programme.cached,
	Programme.cached.zero? ?
	  0 : ( ( Programme.cached.to_f / programme_total ) * 100 ).round ]
  end

  if options.actual
    $stderr.puts '%d old programme(s) ignored.' % [ ignored_total ]
  end

  if options.ratings
    attempted = rating_total + Rating.not_rated + Rating.cache_negatives
    failed = attempted - rating_total
    $stderr.puts '%d film rating(s) attempted via IMDB.' % [ attempted ]

    mode = options.static_ratings ? 'static' : 'dynamic'
    $stderr.puts '%d (%d%%) film(s) rated, of which %d (%d%%) %s and %d (%d%%) cached.' %
      [ rating_total,
	attempted.zero? ?
	  0 : ( ( rating_total.to_f / attempted ) * 100 ).round,
	Rating.rated,
	r = rating_total.zero? ?
	  0 : ( ( Rating.rated.to_f / rating_total ) * 100 ).round,
	mode,
	Rating.cache_positives,
	100 - r ]
    
    $stderr.puts '%d (%d%%) film(s) not rated, of which %d (%d%%) %s and %d (%d%%) cached.' %
      [ failed,
	attempted.zero? ? 0 : ( ( failed.to_f / attempted ) * 100 ).round,
	Rating.not_rated,
	nr = failed.zero? ?
	  0 : ( ( Rating.not_rated.to_f / failed ) * 100 ).round,
	mode,
	Rating.cache_negatives,
	100 - nr ]
  end

  elapsed = elapsed.to_i
  $stderr.printf %Q(The total running time was %02d' %02d" ) %
    [ elapsed / 60, elapsed % 60 ]
  $stderr.puts '(%d pages(s) / %d programme(s) per second).' %
    [ fetches / elapsed, programme_total / elapsed ]
end

# Produce missing programme data report.
#
if options.verbose && ! missing.flatten.empty?
  $stderr.puts 'Missing programme data report:'

  # Report which days had no data for certain channels.
  #
  index = start_day + options.offset - 1
  missing.each do |day|
    day.sort!
    index += 1
    next if day.empty?
    $stderr.puts "\nChannels on day %d:" % [ index ]
    names = channels.values_at( *day ).collect { |channel| channel.name }
    day.zip( names ).each { |num, name| $stderr.puts '%5d %s' % [ num, name ] }
  end
  $stderr.puts

  # Report which channels had no data on any day.
  #
  missing.flatten.uniq.each do |channum|
    no_data = true
    ( start_day + options.offset ).
      upto( start_day + options.days - 1 + options.offset ) do |day|
      if ! missing[day - options.offset - 1].include?( channum )
	no_data = false
	break
      end
    end

    if no_data
      $stderr.puts 'Channel %3d (%s) has no data for any day!' %
	[ channum, channels[channum].name ]
    end
  end
  $stderr.puts

end

# Produce unknown category report.
#
if options.debug && ! categories.empty?
  $stderr.puts \
    "The following unrecognised programme categories were found:\n\n"
  categories.each do |category, progs|
    $stderr.puts '%s:' % [ category ]
    progs.each do |chan, start, stop, title|
      $stderr.puts '%3d %s - %s : %s' % [ chan, start, stop, title ]
    end
    $stderr.puts
  end
end