#!/usr/bin/env python

"""Mine Debian release-critical bug data and list interesting items

John Belmonte <john@neggie.net>
"""

import re
import urllib
import time
import cgi
import os


RC_LIST_URL = 'http://bugs.debian.org/release-critical/debian/main.html'
BUG_URL = 'http://bugs.debian.org/%d'
BUG_MBOX_URL = 'http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s&mbox=yes'
DATE_PATTERN = '%d %b %Y %H:%M:%S'
OUTPUT_NAME = 'neglected-bugs.html'
HTML_TEMPLATE = """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
            "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
    <title>Debian Neglected RC Bugs</title>
</head>
<body>
<p>
This is a list of Debian bugs having all of the following attributes:
</p>
<ul>
<li>in the main archive
<li>release-critical
<li>no follow-ups
<li>older than %(min_days)d days
</ul>
<p>
Generated on %(current_date_string)s by
<tt><a href="mine-bugs">mine-bugs</a></tt>.
</p>
<p>
%(item_count)d items, sorted by descending bug number.
</p>
<pre>
Bug No.  Days  Package           Bug Title
-------  ----  ----------------  ---------
%(output_list)s
</pre>
</body>
</html>
"""

#bug_list_html = open('main.html').read()
bug_list_html = urllib.urlopen(RC_LIST_URL).read()

bug_re = re.compile(r'<A NAME="([0-9]+)">', re.IGNORECASE)
bug_numbers = bug_re.findall(bug_list_html)

current_time = time.time()
current_date_string = time.strftime('%a %d %b %H:%M:%S UTC %Y', time.gmtime())
min_days = 14
bug_records = []

package_re = re.compile(r'^Package:\s*(.*?)$', re.MULTILINE | re.IGNORECASE)
subject_re = re.compile(r'^Subject: (.*?)$', re.MULTILINE)
from_re = re.compile(r'^From .*?^Date: (.*?)$', re.MULTILINE | re.DOTALL)

for bug_number in bug_numbers:
    url = BUG_MBOX_URL % bug_number
    #print url
    bug_mbox = urllib.urlopen(url).read()
    try:
        package = package_re.search(bug_mbox).group(1)
    except AttributeError:
        continue
    try:
        subject = subject_re.search(bug_mbox).group(1)
    except AttributeError:
        subject = '(no subject)'
    from_list = from_re.findall(bug_mbox)
    num_followups = len(from_list) - 1
    last_time_string = from_list[-1]
    #print '%3s followups, last is %s' % (num_followups, last_time_string)
    last_time_offset = '+0000'
    # remove extraneous stuff from date string
    last_time_string = re.sub(r' \(.*?\)', '', last_time_string)
    last_time_string = re.sub(r'^\w+?, ', '', last_time_string)
    last_time_string = re.sub(r' GMT', '', last_time_string)
    # separate offset, handle missing offset
    match = re.search(r'^(.*) ([+-]\d+)$', last_time_string)
    if match:
        last_time_string, last_time_offset = match.groups()
    # TODO: process date offset
    try:
        last_time = time.mktime(time.strptime(last_time_string, DATE_PATTERN))
    except (ValueError, OverflowError), e:
        #print e
        continue
    elapsed_seconds = max(current_time - last_time, 0)
    elapsed_days = elapsed_seconds / (60 * 60 * 24)
    #print '%s  %3d  %s' % (bug_number, int(elapsed_days), subject)
    class record: pass
    record.bug_number = int(bug_number)
    record.elapsed_days = elapsed_days
    record.package = package
    record.subject = subject
    record.num_followups = num_followups
    bug_records.append(record)
    #if len(bug_records) >= 50: break

def is_neglected(record):
    return record.num_followups <= 0 and record.elapsed_days >= min_days

def sort_by_bug_num(a, b):
    return cmp(b.bug_number, a.bug_number)

def shrink(s, num_characters):
    """return s truncated to num_characters, adding "..." if needed"""
    suffix = '...'
    if len(s) > num_characters:
        return s[0:num_characters-len(suffix)] + suffix
    else:
        return s

def record_to_html(record):
    # remove package name prefix from bug title, since it's redundant
    subject = re.sub('^%s[:_]?\s*' % re.escape(record.package), '',
        record.subject)
    return '<a href="%s">%7d</a>  %4d  %-16s  %s' % (
        BUG_URL % record.bug_number, record.bug_number,
        int(record.elapsed_days), shrink(record.package, 16),
        cgi.escape(subject))

output_records = filter(is_neglected, bug_records)
output_records.sort(sort_by_bug_num)
item_count = len(output_records)
output_list = '\n'.join(map(record_to_html, output_records))
output_html = HTML_TEMPLATE % vars()

if os.path.exists(OUTPUT_NAME):
    os.rename(OUTPUT_NAME, OUTPUT_NAME + '.bak')
open(OUTPUT_NAME, 'w').write(output_html)
