#!/usr/bin/python
# -*- coding:Utf-8 -*-

###################################################################################
# mop2table.py - CGI Application to show the web page                             #
# http://filyb.info/dotclear/index.php/2006/05/21/286-la-marche-au-pas            #
# as a table                                                                      #
#                                                                                 #
# Version 0.1.1                                                                   #
# Copyright Florian Birée, 2006                                                   #
# http://filyb.info/                                                              #
#                                                                                 #
# Changelog:                                                                      #
#                                                                                 #
# # version 0.1.1                                                                 #
#                                                                                 #
#   o bug des liens d'en tête non fermés.                                         #
#                                                                                 #
# This program is free software; you can redistribute it and/or                   #
# modify it under the terms of the GNU General Public License                     #
# as published by the Free Software Foundation; either version 2                  #
# of the License, or (at your option) any later version.                          #
#                                                                                 #
# This program is distributed in the hope that it will be useful,                 #
# but WITHOUT ANY WARRANTY; without even the implied warranty of                  #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                   #
# GNU General Public License for more details.                                    #
#                                                                                 #
# You should have received a copy of the GNU General Public License               #
# along with this program; if not, write to the Free Software                     #
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. #
# http://www.gnu.org/copyleft/gpl.html                                            #
###################################################################################

__author__ = "Florian Birée"
__version__ = "0.1.1"
__copyright__ = "Florian Birée, 2006, under GNU/GPL (http://www.gnu.org/copyleft/gpl.html)"

###Modules import
import os
import sys
import urllib
import cgitb; cgitb.enable()

url = "http://filyb.info/dotclear/index.php/2006/05/21/286-la-marche-au-pas"

html_headers = "Content-Type: text/html\n"

page_head = """
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="fr" lang="fr">
<head>
    <meta http-equiv="Content-Type"	content="text/html; charset=UTF-8" />
    <title>La marche au pas - tableau de chasse</title>
    <style type="text/css">
    table {
        border-style:solid;
        border-width:1px;
        border-collapse:collapse;
    }
    td, th {
        border-style:solid;
        border-width:1px;
    }
    tr.thesa {
        background:yellow;
    }
    tr.quinze {
        background:red;
    }
    </style>
</head>
<body id="mop2table">
    <h1>Marche ! ô <acronym title="Programme Automatique de Synthèse">PAS</acronym></h1>
    <p>Avis&nbsp;: ceci est une version synthétique de la longue discussion publiée sous le titre <a href="http://filyb.info/dotclear/index.php/2006/05/21/286-la-marche-au-pas" hreflang="fr" title="Page actuelle de la discussion"><q>La marche au pas</q></a>. Cette version, sous la forme d'un tableau, est générée automatiquement depuis la page sus-liée.</p>
    <p>Ce <acronym title="Programme Automatique de Synthèse">PAS</acronym> a franchi la version %s, d'après son auteur Florian Birée alias Thesa.</p>
""" % __version__

page_footer = """
    <p>mop2table.py - %s</p>
    <p>Ce programme essaye de générer du code <a href="http://validator.w3.org/check?uri=referer" hreflang="en" title="Vérifier que cette page est valide en XHTML 1.0 strict - Anglais">valide <acronym title="Extended hypertext markup language">XHTML</acronym> 1.0</a> (mais c'est pas gagné !).</p>
</body>
</html>
""" % __version__

ex = {}
ex[8] = ['a. à','b. n','c. c','d. r']
ex[9] = ['« Tout','(Note','Pour moi, je trouve au contraire',"Et c'est par souci",'Tout ça pour']
ex[10] = ["Je pense qu'elles"]
ex[11] = ['<pre>']
ex[12] = ['/:','\\.','ksuhzuihgvz']
ex[13] = ['.|',':|']
ex[14] = ['|:','Racatacata','dhvkhjgslkuyvk']
ex[17] = ['Quant au fait que']
ex[19] = ['Si je me prive de polémiquer']
ex[24] = ['y. Attention','x. E.T. a débarqué']


###Main
if __name__ == "__main__" :
    #récupérer le code source de la page dans html
    sock = urllib.urlopen(url)
    html = sock.read()
    sock.close()
    
    #split selon <blockquote class="bc">
    post_list = html.split('<blockquote class="bc">')
    
    #supprimer le premier élément (le post initial, etc.)
    post_list = post_list[1:]
    
    #supprimer le contenu après </blockquote> dans chacun des post
    post_list2 = []
    for post in post_list:
        post_list2.append(post.split('</blockquote>')[0])
    post_list = post_list2
    
    #split selon <li> et <br>
    post_list2 = []
    l_max = 0
    for post in post_list:
        if post.count('<li>') :
            #post en liste
            thread_list = post.replace('<ul>','').replace('</ul>','').replace('</li>','').split('<li>')
            thread_list = thread_list[1:]
        else:
            # thread séparés par <br />
            #thread_list = post.replace('<p>','').replace('</p>','').replace('<br />\n<br />', '<br />').replace('<br />\r\n<br />').split('<br />')
            #thread_list = post.replace('<p>','').replace('</p>','').split('<br />')
            thread_list = post.replace('<p>','').replace('</p>','').replace('\n<br />','').split('<br />')
        if len(thread_list) > l_max :
            l_max = len(thread_list)
        post_list2.append(thread_list)
    post_list = post_list2
    
    #Création du tableau
    table = """
    <table>
        <thead><tr><th>Auteurs</th>"""
    for i in range(l_max) :
        table += '<th>' + str(i+1) + '</th>'
    table += '</tr></thead>\n<tbody>\n'
    pid = 1
    for post in post_list:
        #print pid
        #hacks
        if pid in ex :
            #print 'hack!!'
            p2 = []
            for thread in post:
                #print 'th:"'+thread+'"'
                pack = 0
                for x in ex[pid]:
                    if thread.count(x) != 0 :
                        #print 'pack because',x
                        pack = 1
                #print 'es'
                if pack :
                    p2[-1] += '<br />' + thread
                else :
                    p2.append(thread)
                    #print 'nopack'
                #print p2
            post = p2
                                                
        if pid % 2 :
            auth = 'Thesa'
            cl = 'thesa'
        else:
            auth = '15.96'
            cl = 'quinze'
        table += '            <tr class="' + cl + '">'
        table += '<td><a href="' + url + '#hc' + str(pid) + '" hreflang="fr" title="Aller à cette réponse">#' + str(pid) + ' - ' + auth + '</a></td>'
        pid += 1
        for thread in post :
            table += '<td>'
            table += thread
            table += '</td>'
        table += '</tr>\n'
        if pid % 2 :
            #ajouter une ligne de numéros
            table += '<tr><td>Auteurs</td>'
            for i in range(l_max) :
                table += '<td>' + str(i+1) + '</td>'
            table += '</tr>'
    table += '        </tbody>'
    table += '    </table>'
    
    
    #Construction de la page
    page = page_head + table + page_footer
    
    #affichage de la page
    print html_headers
    print page
