#!/usr/bin/env python
# Anomalous.py
# Maintained by G.Winter
# 18th November 2004
# 
# A Jiffy to parse the ANOMPLOT and NORMPLOT output files from Scala
# and calculate the strength of the anomalous signal on a rather arbitary
# scale - which can then be used to decide if there is enough signal in there
# to find heavy atoms (this will be fun to implement!)
# 
# $Id: Anomalous.py,v 1.3 2005/11/22 13:38:14 svensson Exp $

import math

tolerance = 0.001

def interpolate(list, xvalue):
    '''Calculate a yvalue from list interpolated at xvalue - assumed list
    is sorted on x'''

    if list[0][0] > xvalue:
        raise RuntimeError, 'Domain error'
    if list[-1][0] < xvalue:
        raise RuntimeError, 'Domain error'
    i = 0
    while list[i][0] < xvalue:
        i += 1

    gradient = (list[i][1] - list[i - 1][1]) / (list[i][0] - list[i - 1][0])

    yvalue = list[i - 1][1] + (xvalue - list[i - 1][0]) * gradient

    return yvalue

def resort(list):
    '''Resort a list of pairs via a dictionary and so on'''

    # check the order if sorted don't resort

    sorted = True

    x = list[0][0]
    for l in list:
        if x > l[0]:
            sorted = False
        x = l[0]

    if sorted:
        return list

    dict = { }
    for l in list:
        dict[l[0]] = l[1]
    result = []
    keys = dict.keys()
    keys.sort()
    for k in keys:
        result.append((k, dict[k]))
    return result
    
def limits(list):
    '''Determine the lower and upper limits of the list'''
    # check the order
    x = list[0][0]
    for l in list:
        if x > l[0]:
            raise RuntimeError, 'Input list not sorted'
        x = l[0]
    return list[0][0], list[-1][0]

def truncate(list, min, max):
    '''Make a new list from the middle of an old one'''

     # check the order
    x = list[0][0]
    for l in list:
        if x > l[0]:
            raise RuntimeError, 'Input list not sorted'
        x = l[0]

    if min < list[0][0]:
        raise RuntimeError, 'Domain error in truncate'
    if max > list[-1][0]:
        raise RuntimeError, 'Domain error in truncate'

    result = []

    i = 0
    while list[i][0] < (min - tolerance):
        i += 1
    while list[i][0] <= (max + tolerance):
        result.append(list[i])
        i += 1

    return result

def regrid(list, min, max, spacing):
    '''Create a new list which has the values from list regridded using
    interpolate between min and max (inclusive) with spacing spacing'''

    # check the order
    x = list[0][0]
    for l in list:
        if x > l[0]:
            raise RuntimeError, 'Input list not sorted'
        x = l[0]

    if min < list[0][0]:
        raise RuntimeError, 'Domain error in regrid'
    if max > list[-1][0]:
        raise RuntimeError, 'Domain error in regrid'

    x = min
    grid = []
    while x <= (max + tolerance):
        grid.append((x, interpolate(list, x)))
        x += spacing

    return grid

def average(list1, list2):
    '''Average together two lists to make a new list - note that both lists
    must have the same x values'''

    if not len(list1) == len(list2):
        raise RuntimeError, 'lists different length'

    for i in range(len(list1)):
        if not math.fabs(list1[i][0] - list2[i][0]) < tolerance:
            raise RuntimeError, 'lists different grid spacing'

    result = []
    for i in range(len(list1)):
        result.append((0.5 * (list1[i][0] + list2[i][0]),
                       0.5 * (list1[i][1] + list2[i][1])))

    return result

def chi_sq(list):
    '''Use trapezium rule to integrate the difference between
    (o - e) * (o - e) where o = y and e = x'''

    integral = 0.0
    for i in range(len(list) - 1):
        datum = (list[i + 1][1] - list[i + 1][0]) + \
                (list[i][1] - list[i][0])
        integral += 0.5 * (list[i + 1][0] - list[i][0]) * datum * datum

    return integral

class Plot:
    '''A class to represent the data in an ANOMPLOT or a NORMPLOT
    in the format used by xmgrace'''

    def __init__(self, filename):
        '''Instantiate the class with some data from a file'''

        self.data = ParsePLOT(filename)

    def getRuns(self):
        return self.data['runs']

    def get(self, this):
        if self.data.has_key(this):
            min, max = limits(resort(self.data[this]))
            min = 1.0 * int(min)
            max = 1.0 * int(max)
            return regrid(resort(self.data[this]), min, max, 0.1)
        else:
            return None

def ParsePLOT(plotfile):
    '''Parse a plot file to a dictionary of lists of pairs of numbers'''

    result = { }
    pairlist = [ ]

    recording = False

    legend = None

    runs = 0

    for line in open(plotfile, 'r').readlines():
        list = line.split()

        if line[:12] == '@  legend on':
            legend = None
        if line[:13] == '@  legend off':
            legend = 'no legend'
        if line[:16] == '@  legend string':
            legend = line.split('"')[1]

            if legend[:3] == 'Run':
                this_run = int(legend.split(' ')[1].replace(':', ''))
                if this_run > runs:
                    runs = this_run

        if list[0] == '&':
            recording = False
            if legend:
                result[legend] = pairlist
            pairlist = []
            legend = None

        if recording:
            pairlist.append((float(list[0]), float(list[1])))
        
        if list[0] == '@type' and legend:
            recording = True

    result['runs'] = runs

    return result

def AnalyseNPPAnom(normfile, anomfile):
    '''Analyse the normal probability plots, and return four numbers,
    the chi_sq for mean(full, partial), chi_sq(full), chi_sq(partial)
    and chi_sq(anomalous)'''
    
    normplot = Plot(normfile)

    runs = normplot.getRuns()

    if runs > 0:
        # there were multiple runs in this data set, so these will
        # have to be accessed from "Run n: fulls" etc.

        # or just cheat ;o)
        return 0.0, 0.0, 0.0, 0.0, -1.0, 1.0

    full = normplot.get('fulls')

    if full:
        # then there were some full reflections

        partial = normplot.get('summed partials')
        if len(full) < len(partial):
            # then there was more spread in the partials - regrid to the fulls
            partial = truncate(partial, full[0][0], full[-1][0])

        anomplot = Plot(anomfile)
        anom = anomplot.get('no legend')


        if len(full) < len(anom):
            # then there was more spread in the partials - regrid to the fulls
            anom = truncate(anom, full[0][0], full[-1][0])

        mean = average(full, partial)

        return chi_sq(mean), chi_sq(full), chi_sq(partial), chi_sq(anom), \
               full[0][0], full[-1][0]

    else:
        # the data set was completely partial
        partial = normplot.get('summed partials')
        anomplot = Plot(anomfile)
        anom = anomplot.get('no legend')

        return chi_sq(partial), 0, chi_sq(partial), chi_sq(anom), \
               partial[0][0], partial[-1][0]
    
if __name__ == '__main__':

    normfile = '/home/graeme/process/data/insulin/scale/NORMPLOT'
    anomfile = '/home/graeme/process/data/insulin/scale/ANOMPLOT'

    print AnalyseNPPAnom(normfile, anomfile)

