Least Significant Difference (LSD) in Python
Least Significant Difference (LSD) in Python

http://www.github.com/matti-kariluoma-ndsu/wheat-dissem/blob/kariluoma/doc/LSD.py

The Least Significant Difference (LSD) is a measure of how much of a difference between means must be observed before one can say the means are significantly different. e.g. the means

are not significantly different from one another is their LSD is 1.0, but they are significantly different if their LSD is 0.1.

#!/usr/bin/python
# coding=utf8
#
# Python routines to caclulate the LSD of a balanced data set.
#
# Taken line-by-line from the agricolae project
# (http://cran.r-project.org/web/packages/agricolae/index.html ,
#  http://tarwi.lamolina.edu.pe/~fmendiburu) for R
# (http://r-project.org)
#
# Matti Kariluoma Sep 2011 

from math import sqrt, pi, cos, sin, exp
from scipy.special import erfinv

def qnorm(probability):
	"""
	A reimplementation of R's qnorm() function.

	This function calculates the quantile function of the normal
	distributition.
	(http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function)

	Required is the erfinv() function, the inverse error function.
	(http://en.wikipedia.org/wiki/Error_function#Inverse_function)
	"""
	if probability > 1 or probability <= 0:
		raise BaseException # TODO: raise a standard/helpful error
	else:
		print (2*probability - 1)
		print erfinv(2*probability - 1)
		return sqrt(2) * erfinv(2*probability - 1)

def qt(probability, degrees_of_freedom):
	"""
	A reimplementation of R's qt() function.

	This function calculates the quantile function of the student's t
	distribution.
	(http://en.wikipedia.org/wiki/Quantile_function#The_Student.27s_t-distribution)

	This algorithm has been taken (line-by-line) from Hill, G. W. (1970)
	Algorithm 396: Student's t-quantiles. Communications of the ACM,
	13(10), 619-620.

	Currently unimplemented are the improvements to Algorithm 396 from
	Hill, G. W. (1981) Remark on Algorithm 396, ACM Transactions on
	Mathematical Software, 7, 250-1.
	"""
	n = degrees_of_freedom
	P = probability
	t = 0
	if (n < 1 or P > 1.0 or P <= 0.0 ):
		raise BaseException #TODO: raise a standard/helpful error
	elif (n == 2):
		t = sqrt(2.0/(P*(2.0-P)) - 2.0)
	elif (n == 1):
		P = P * pi/2
		t = cos(P)/sin(P)
	else:
		a = 1.0/(n-0.5)
		b = 48.0/(a**2.0)
		c = (((20700.0*a)/b - 98.0)*a - 16.0)*a + 96.36
		d = ((94.5/(b+c) - 3.0)/b + 1.0)*sqrt((a*pi)/2.0)*float(n)
		x = d*P
		y = x**(2.0/float(n))

		if (y > 0.05 + a):
			x = qnorm(P*0.5)
			y = x**2.0

			if (n < 5):
				c = c + 0.3*(float(n)-4.5)*(x+0.6)

			#c = (((0.05*d*x-5.0)*x-7.0)*x-2.0)*x+b+c
			c1 = (0.05*d*x) - 5.0
			c2 = c1*x - 7.0
			c3 = c2*x - 2.0
			c4 = c3*x + b + c
			c = c4
			#y = (((((0.4*y+6.3)*y+36.0)*y+94.5)/c-y-3.0)/b+1.0)*x
			y1 = (0.4*y+6.3)*y + 36.0
			y2 = y1*y + 94.5
			y3 = y2/c - y - 3.0
			y4 = y3/b + 1.0
			y5 = y4*x
			y = y5

			y = a*(y**2.0)

			if (y > 0.002):
				y = exp(y) - 1.0
			else:
				y = 0.5*(y**2.0) + y

		else:
			#y = ((1.0/(((float(n)+6.0)/(float(n)*y)-0.089*d-0.822)
			#*(float(n)+2.0)*3.0)+0.5/(float(n)+4.0))*y-1.0)*(float(n)+1.0)
			#/(float(n)+2.0)+1.0/y
			y1 = float(n)+6.0
			y2 = y1/(float(n)*y)
			y3 = y2 - 0.089*d - 0.822
			y4 = y3 * (float(n)+2.0) * 3.0
			y5 = 1.0 / y4
			y6 = y5 + 0.5/(float(n)+4.0)
			y7 = y6*y - 1.0
			y8 = y7 * (float(n)+1.0)
			y9 = y8 / (float(n)+2.0)
			y10 = y9 + 1.0/y
			y= y10

		t = sqrt(float(n)*y)

	return t

def LSD(response_to_treatments, probability):
	"""
	A stripped-down reimplementation of LSD.test from the agricoloae
	package. (http://cran.r-project.org/web/packages/agricolae/index.html)

	Calculates the Least Significant Difference of a multiple comparisons
	trial, over a balanced dataset.
	"""

	trt = response_to_treatments
	#model = aov(y~trt)
	#df = df.residual(model)
	# df is the residual Degrees of Freedom
	# n are factors, k is responses per factor (treatments)
	n = len(trt)
	k = len(trt[0]) # == len(trt[1]) == ... == len(trt[n]) iff we have a balanced design
	degrees_freedom_of_error = (n-1)*(k-1)

	treatment_means = {}
	for i in range(n): # n == len(trt)
		total = 0.0
		for j in range(k):
			total += float(trt[i][j])
		treatment_means[i] = total/k

	block_means = {}
	for j in range(k):
		total = 0.0
		for i in range(n):
			total += float(trt[i][j])
		block_means[j] = total/n

	grand_mean = sum(treatment_means.values()) / float(n)

	# TODO: what is the difference between type I and type III SS? (http://www.statmethods.net/stats/anova.html)
	SSE = 0.0
	for i in range(n): # n == len(trt)
		for j in range(k):
			SSE += (float(trt[i][j]) - treatment_means[i] - block_means[j] + grand_mean)**2.0

	#print "SSE: %f\n" % (SSE)

	mean_squares_of_error = SSE / degrees_freedom_of_error

	#print "MSE: %f\n" % (mean_squares_of_error)

	Tprob = qt(probability, degrees_freedom_of_error)

	#print "t-value: %f\n" % (Tprob)

	LSD = Tprob * sqrt(2.0 * mean_squares_of_error / k)

	return LSD

References:
http://cran.r-project.org/web/packages/agricolae/index.html
http://tarwi.lamolina.edu.pe/~fmendiburu
http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function
http://en.wikipedia.org/wiki/Error_function#Inverse_function
http://en.wikipedia.org/wiki/Quantile_function#The_Student.27s_t-distribution
Hill, G. W. (1970) Algorithm 396: Student’s t-quantiles. Communications of the ACM, 13(10), 619-620.
Hill, G. W. (1981) Remark on Algorithm 396, ACM Transactions on Mathematical Software, 7, 250-1.


Leave a Comment?

Send me an email, then I'll place our discussion on this page (with your permission).


Return | About/Contact