#!/usr/bin/python
# -*- coding: utf-8 -*-

# this script is too small to constitute a copyrightable work in Finland, 
# so you may use it as you wish. If you live in jurisdiction where it would 
# be copyrightable, I hereby give you right to use it as you wish. 

import re
import urllib2

basepage = urllib2.urlopen('http://www.hsl.fi/FI/aikataulutjareitit/raitiolinjat/Sivut/default.aspx')
btext = basepage.read()
urls = re.findall('http://aikataulut.hsl.fi.*html', btext)

for url in urls:
    page = urllib2.urlopen(url)
    text = page.read()
    text = text[:text.find('<br />Lauantaisin')] #viikonloput veke
    matches = re.findall('&nbsp;&nbsp;[0-2][0-9]\.[0-9]{2}', text)
    print "sivulla ", url, "on lähtöjä", len(matches)   
    if url.find('_1a') > 0:
        matches = re.findall('&nbsp;&nbsp;[0-2][0-9]\.[0-9]{2}A', text)
        print "joista 1A", len(matches)
    if url.find('_4t') > 0:
        matches = re.findall('&nbsp;&nbsp;[0-2][0-9]\.[0-9]{2}T', text)
        print "joista 4T", len(matches)
 


