# -*- coding: utf-8 -*-
#   WillowNG - Content Filtering Web Proxy
#   Copyright (C) 2006  Travis Watkins
#
#   This library is free software; you can redistribute it and/or
#   modify it under the terms of the GNU Library General Public
#   License as published by the Free Software Foundation; either
#   version 2 of the License, or (at your option) any later version.
#
#   This library is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   Library General Public License for more details.
#
#   You should have received a copy of the GNU Library General Public
#   License along with this library; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

from SocketServer import ForkingMixIn
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
from urlparse import urlparse, urlunparse
import select
import socket
import gobject

filter_types = ('text/html', 'text/plain', 'application/xml')

class ProxyHandler (BaseHTTPRequestHandler):
	do_bayesian_check = True

	def _connect_to(self, host_info, out_socket):
		if ':' in host_info:
			host, port = host_info.split(':')
		else:
			host, port = host_info, 80
		print host
		port = int(port)
		if self.server.domain.isBad(host):
			temp = self.error_message_format
			self.error_message_format = ''
			error_page = open(self.server.config['domain_blocked_page']).read()
			error_page = error_page.replace('%domain%', host)
			self.send_error(403, error_page)
			self.error_message_format = temp
			return False
		if self.server.domain.isGood(host):
			#don't want to run the content filter on whitelisted domains
			self.do_bayesian_check = False
		try:
			out_socket.connect((host, port))
		except socket.error, arg:
			try:
				msg = arg[1]
			except:
				msg = arg
			self.send_error(404, msg)
			return False
		return True

	def do_CONNECT(self):
		out_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		try:
			if self._connect_to(self.path, out_socket):
				self.wfile.write(self.protocol_version + ' 200 Connection established\r\n')
				self.wfile.write("\r\n")
				self._read_write(out_socket, 300, True)
		finally:
			out_socket.close()
			self.connection.close()

	def do_GETandPOST(self, data = None):
		protocol, host_info, path, params, query, fragment = urlparse(self.path, 'http')
		if protocol != 'http' or fragment or not host_info:
			self.send_error(400, 'bad url %s' % self.path)
			return
		out_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		try:
			if self._connect_to(host_info, out_socket):
				request = urlunparse(('', '', path, params, query, ''))
				out_socket.send('%s %s %s\r\n' % (self.command, request, self.request_version))
				self.headers['Connection'] = 'close'
				del self.headers['Proxy-Connection']
				del self.headers['Accept-Encoding']
				for key_val in self.headers.items():
					out_socket.send('%s: %s\r\n' % key_val)
				out_socket.send('\r\n')
				if data and len(data):
					out_socket.sendall(data)
				self._read_write(out_socket)
		finally:
			out_socket.close()
			self.connection.close()

	def do_GET(self):
		self.do_GETandPOST()

	def do_POST(self):
		if self.headers.has_key('Content-Length'):
			length = int(self.headers['Content-Length'])
			data = self.rfile.read(length)
		else:
			self.connection.setblocking(0)
			try:
				data = self.rfile.read()
			except IOError:
				data = ''
			self.connection.setblocking(1)
		self.do_GETandPOST(data)

	def _read_write(self, out_socket, max_idle=20, https=False):
		filter_data = ''
		input_socks = [self.connection, out_socket]
		count = 0
		content_type = ''
		while 1:
			count += 1
			input_ready, _, exceptions = select.select(input_socks, [], input_socks, 3)
			if exceptions:
				break
			for input_sock in input_ready:
				data = input_sock.recv(8192)
				if data:
					if input_sock is out_socket:
						if https:
							#send right away, can't do content filtering
							self.connection.send(data)
						else:
							#if it's not something we want to filter, send it right away
							if len(content_type) and content_type not in filter_types:
								#make sure we send the data we already have
								if len(filter_data):
									self.connection.sendall(filter_data)
									filter_data = ''
								self.connection.send(data)
							else:
								filter_data += data
								#find content type, split off charset
								if '\r\n\r\n' in filter_data and 'content-type' in filter_data.lower():
									temp = filter_data[filter_data.lower().find('content-type:') + 13:]
									content_type = temp[:temp.find('\r\n')].split(';')[0].strip()
					else:
						out_socket.send(data)
					count = 0
			if count == max_idle:
				if len(filter_data):
					if self.do_bayesian_check and self.server.bayes.isBad(filter_data):
						temp = self.error_message_format
						self.error_message_format = ''
						error_page = open(self.server.config['content_blocked_page']).read()
						self.send_error(403, error_page)
						self.error_message_format = temp
					else:
						self.connection.sendall(filter_data)
					filter_data = ''
					count = 0
				else:
					break

	do_HEAD = do_GET
	do_PUT = do_GET
	do_DELETE = do_GET

class ForkingHTTPProxy (ForkingMixIn, HTTPServer):
	def __init__(self, address, bayes, domain, config):
		self.bayes = bayes
		self.domain = domain
		self.config = config
		HTTPServer.__init__(self, address, ProxyHandler)

	def serve_forever(self):
		try:
			read, write, exceptions = select.select([self.socket.fileno()], [], [], 1)
			if read:
				self.handle_request()
			return True
		except KeyboardInterrupt:
			self.bayes.cleanup()
			#we want to quit now, not return to the mainloop
			import sys
			sys.exit()
