# ----------------------------------------------------------
# AdvaS Advanced Search 
# module dealing with document descriptors
#
# (C) 2002 - 2005 Frank Hofmann, Chemnitz, Germany
# email fh@efho.de
# ----------------------------------------------------------

# changed 2004-11-16

import math
from basicLists import convert_list_into_dictionary

def calc_rsv (d, p, q):
	"calculates the document weight for document descriptors"

	# rsv: return status value
	# d: list of existance (1) or non-existance (0)
	# p, q: list of probabilities of existance (p) and non-existance (q)

	items_p = len(p)
	items_q = len(q)
	items_d = len(d)

	if ((items_p - items_q) <> 0):
		# different length of lists p and q
		return 0
	# end if

	if ((items_d - items_p) <> 0):
		# different length of lists d and p
		return 0
	# end if

	# define rsv
	rsv = 0

	for i in range(items_p):
		eq_upper = float(p[i]) / float(1-p[i])
		eq_lower = float(q[i]) / float(1-q[i])

		value = float(d[i] * math.log (eq_upper / eq_lower))

		rsv = rsv + value
	# end for

	# return rsv
	return rsv

def comp_descriptors (request, document):
	"returns the degree of equality between two descriptors (often a request and a document)"

	# request, document: lists of descriptors
	# return value: float, between 0 and 1

	# define return value
	equality = 0

	# calc number of items
	items_request = len(request)
	items_document = len(document)

	# calc similar descriptors
	request_dict = convert_list_into_dictionary(request, 0)
	document_dict = convert_list_into_dictionary(document, 0)
	similar_descriptors = 0

	for item in request_dict.keys():
		if document_dict.has_key(item):
			similar_descriptors += 1
		# end if
	# end for

	# calc equality
	equality = float(similar_descriptors) / float ((math.sqrt(items_request) * math.sqrt(items_document)))

	# return degree of equality
	return equality


