# -------------------------------------------------------------------------
#     Copyright (C) 2005-2011 Martin Strohalm <www.mmass.org>

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 3 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#     GNU General Public License for more details.

#     Complete text of GNU GPL can be found in the file LICENSE.TXT in the
#     main directory of the program
# -------------------------------------------------------------------------

#load libs
import time
import sys
import struct
import base64
import zlib
import copy
import xml.dom.minidom
import os.path
import re
import numpy

# load modules
import config
import mspy


# DOCUMENT STRUCTURE
# ------------------

class document():
    """Document object definition."""
    
    def __init__(self):
        
        self.format = 'mSD'
        self.title = ''
        self.path = ''
        
        self.date = ''
        self.operator = ''
        self.contact = ''
        self.institution = ''
        self.instrument = ''
        self.notes = ''
        
        self.spectrum = mspy.scan()
        self.annotations = []
        self.sequences = []
        
        self.colour = (0,0,255)
        self.dirty = False
        self.visible = True
        self.flipped = False
        self.offset = [0,0]
        
        # undo buffers
        self.undo = None
        self.spectrumBuff = None
        self.annotationsBuff = None
        self.sequencesBuff = None
    # ----
    
    
    def backup(self, items=None):
        """Backup current state for undo."""
        
        self.undo = items
        
        # delete old
        self.spectrumBuff = None
        self.annotationsBuff = None
        self.sequencesBuff = None
        
        if not items:
            return
        
        # store data
        if 'spectrum' in items:
            self.spectrumBuff = copy.deepcopy(self.spectrum)
        if 'annotations' in items:
            self.annotationsBuff = copy.deepcopy(self.annotations)
        if 'sequences' in items:
            self.sequencesBuff = copy.deepcopy(self.sequences)
        if 'notations' in items:
            self.annotationsBuff = copy.deepcopy(self.annotations)
            self.sequencesBuff = copy.deepcopy(self.sequences)
    # ----
    
    
    def restore(self):
        """Revert to last stored state."""
        
        # check undo
        if not self.undo:
            return False
        
        # revert data
        items = self.undo
        if 'spectrum' in items:
            self.spectrum = self.spectrumBuff
        if 'annotations' in items:
            self.annotations[:] = self.annotationsBuff[:]
        if 'sequences' in items:
            self.sequences[:] = self.sequencesBuff[:]
        if 'notations' in items:
            self.annotations[:] = self.annotationsBuff[:]
            for x in range(len(self.sequences)):
                self.sequences[x].matches[:] = self.sequencesBuff[x].matches[:]
        
        # clear buffers
        self.undo = None
        self.spectrumBuff = None
        self.annotationsBuff = None
        self.sequencesBuff = None
        
        return items
    # ----
    
    
    def sortAnnotations(self):
        """Sort annotations by m/z."""
        
        buff = []
        for item in self.annotations:
            buff.append((item.mz, item))
        buff.sort()
        
        # remove formula duplicates
        #formulas = []
        #del self.annotations[:]
        #for item in buff:
        #    if not item[1].formula in formulas:
        #        self.annotations.append(item[1])
        #        formulas.append(item[1].formula)
        
        del self.annotations[:]
        for item in buff:
            self.annotations.append(item[1])
    # ----
    
    
    def sortMatches(self):
        """Sort sequence matches by m/z."""
        
        for sequence in self.sequences:
            
            buff = []
            for item in sequence.matches:
                buff.append((item.mz, item))
            buff.sort()
            
            del sequence.matches[:]
            for item in buff:
                sequence.matches.append(item[1])
    # ----
    
    
    def msd(self):
        """Make mSD XML."""
        
        buff = '<?xml version="1.0" encoding="utf-8" ?>\n'
        buff += '<mSD version="2.1">\n\n'
        
        # format description
        buff += '  <description>\n'
        buff += '    <title>%s</title>\n' % self._escape(self.title)
        buff += '    <date value="%s" />\n' % self._escape(self.date)
        buff += '    <operator value="%s" />\n' % self._escape(self.operator)
        buff += '    <contact value="%s" />\n' % self._escape(self.contact)
        buff += '    <institution value="%s" />\n' % self._escape(self.institution)
        buff += '    <instrument value="%s" />\n' % self._escape(self.instrument)
        buff += '    <notes>%s</notes>\n' % self._escape(self.notes)
        buff += '  </description>\n\n'
        
        # format spectrum
        endian = sys.byteorder
        points = self.spectrum.points
        mzArray, intArray = self._convertSpectrum(points)
        attributes = 'points="%s"' % len(points)
        if self.spectrum.scanNumber != None:
                attributes += ' scanNumber="%s"' % self.spectrum.scanNumber
        if self.spectrum.msLevel != None:
                attributes += ' msLevel="%s"' % self.spectrum.msLevel
        if self.spectrum.retentionTime != None:
                attributes += ' retentionTime="%s"' % self.spectrum.retentionTime
        if self.spectrum.precursorMZ != None:
                attributes += ' precursorMZ="%s"' % self.spectrum.precursorMZ
        if self.spectrum.precursorCharge != None:
                attributes += ' precursorCharge="%s"' % self.spectrum.precursorCharge
        if self.spectrum.polarity != None:
                attributes += ' polarity="%s"' % self.spectrum.polarity
        
        buff += '  <spectrum %s>\n' % attributes
        if len(points) > 0:
            buff += '    <mzArray precision="32" compression="zlib" endian="%s">%s</mzArray>\n' % (endian, mzArray)
            buff += '    <intArray precision="32" compression="zlib" endian="%s">%s</intArray>\n' % (endian, intArray)
        buff += '  </spectrum>\n\n'
        
        # format peaklist
        buff += '  <peaklist>\n'
        for peak in self.spectrum.peaklist:
            attributes = 'mz="%.6f" intensity="%.6f" baseline="%.6f"' % (peak.mz, peak.intensity, peak.baseline)
            if peak.sn != None:
                attributes += ' sn="%.3f"' % peak.sn
            if peak.charge != None:
                attributes += ' charge="%d"' % peak.charge
            if peak.isotope != None:
                attributes += ' isotope="%d"' % peak.isotope
            if peak.fwhm != None:
                attributes += ' fwhm="%.6f"' % peak.fwhm
            buff += '    <peak %s />\n' % attributes
        buff += '  </peaklist>\n\n'
        
        # format annotations
        buff += '  <annotations>\n'
        for annot in self.annotations:
            attributes = 'peakMZ="%.6f" peakIntensity="%.6f" peakBaseline="%.6f"' % (annot.mz, annot.intensity, annot.baseline)
            if annot.charge !=None:
                attributes += ' charge="%d"' % annot.charge
            if annot.radical:
                attributes += ' radical="1"'
            if annot.theoretical !=None:
                attributes += ' calcMZ="%.6f"' % annot.theoretical
            if annot.formula !=None:
                attributes += ' formula="%s"' % annot.formula
            buff += '    <annotation %s>%s</annotation>\n' % (attributes, self._escape(annot.label))
        buff += '  </annotations>\n\n'
        
        # format sequences
        buff += '  <sequences>\n\n'
        for index, sequence in enumerate(self.sequences):
            attributes = 'index="%d"' % index
            buff += '    <sequence %s>\n' % attributes
            buff += '      <title>%s</title>\n' % self._escape(sequence.title)
            buff += '      <seq>%s</seq>\n' % sequence.format('S')
            
            # format modifications
            buff += '      <modifications>\n'
            for mod in sequence.modifications:
                gainFormula = mspy.modifications[mod[0]].gainFormula
                lossFormula = mspy.modifications[mod[0]].lossFormula
                modtype = 'fixed'
                if mod[2] == 'v':
                    modtype = 'variable'
                buff += '        <modification name="%s" position="%s" type="%s" gainFormula="%s" lossFormula="%s" />\n' % (mod[0], mod[1], modtype, gainFormula, lossFormula)
            buff += '      </modifications>\n'
            
            # format matches
            buff += '      <matches>\n'
            for match in sequence.matches:
                attributes = 'peakMZ="%.6f" peakIntensity="%.6f" peakBaseline="%.6f"' % (match.mz, match.intensity, match.baseline)
                if match.charge !=None:
                    attributes += ' charge="%d"' % match.charge
                if match.radical:
                    attributes += ' radical="1"'
                if match.theoretical !=None:
                    attributes += ' calcMZ="%.6f"' % match.theoretical
                if match.formula !=None:
                    attributes += ' formula="%s"' % match.formula
                if match.sequenceRange !=None:
                    attributes += ' sequenceRange="%d-%d"' % tuple(match.sequenceRange)
                if match.fragmentSerie !=None:
                    attributes += ' fragmentSerie="%s"' % match.fragmentSerie
                if match.fragmentIndex !=None:
                    attributes += ' fragmentIndex="%s"' % match.fragmentIndex
                buff += '        <match %s>%s</match>\n' % (attributes, self._escape(match.label))
            buff += '      </matches>\n'
            
            buff += '    </sequence>\n\n'
        buff += '  </sequences>\n\n'
        
        buff += '</mSD>\n'
        
        return buff
    # ----
    
    
    def report(self, image=None):
        """Get HTML report."""
        
        mzFormat = '%0.' + `config.main['mzDigits']` + 'f'
        ppmFormat = '%0.' + `config.main['ppmDigits']` + 'f'
        
        # add header
        buff = REPORT_HEADER
        
        # add basic file info
        scanNumber = ''
        retentionTime = ''
        msLevel = ''
        precursorMZ = ''
        polarity = 'unknown'
        points = len(self.spectrum.points)
        peaks = len(self.spectrum.peaklist)
        
        basePeak = self.spectrum.peaklist.basePeak
        if basePeak:
            basePeak = basePeak.intensity - basePeak.baseline
        
        if self.spectrum.scanNumber != None:
            scanNumber = self.spectrum.scanNumber
        if self.spectrum.retentionTime != None:
            retentionTime = self.spectrum.retentionTime
        if self.spectrum.msLevel != None:
            msLevel = self.spectrum.msLevel
        if self.spectrum.precursorMZ != None:
            precursorMZ = self.spectrum.precursorMZ
            
        if self.spectrum.polarity == 1:
            polarity = 'positive'
        elif self.spectrum.polarity == -1:
            polarity = 'negative'
        
        buff += '  <h1>mMass Report: <span>%s</span></h1>\n' % self.title
        buff += '  <table id="tableMainInfo">\n'
        buff += '    <tbody>\n'
        buff += '      <tr><th>Date</th><td>%s</td><th>Scan Number</th><td>%s</td></tr>\n' % (self.date, scanNumber)
        buff += '      <tr><th>Operator</th><td>%s</td><th>Retention Time</th><td>%s</td></tr>\n' % (self.operator, retentionTime)
        buff += '      <tr><th>Contact</th><td>%s</td><th>MS Level</th><td>%s</td></tr>\n' % (self.contact, msLevel)
        buff += '      <tr><th>Institution</th><td>%s</td><th>Precursor m/z</th><td>%s</td></tr>\n' % (self.institution, precursorMZ)
        buff += '      <tr><th>Instrument</th><td>%s</td><th>Polarity</th><td>%s</td></tr>\n' % (self.instrument, polarity)
        buff += '      <tr><th>&nbsp;</th><td>&nbsp;</td><th>Spectrum Points</th><td>%s</td></tr>\n' % (points)
        buff += '      <tr><th>&nbsp;</th><td>&nbsp;</td><th>Peak List</th><td>%s</td></tr>\n' % (peaks)
        buff += '    </tbody>\n'
        buff += '  </table>\n'
        
        # show spectrum
        if image:
            buff += '  <div id="spectrum"><img src="mmass_spectrum.png?%s" alt="Mass Spectrum" width="600" height="400" /></div>\n' % time.time()
        
        # notes
        if self.notes:
            notes = self.notes.replace('\n', '<br />')
            buff += '  <h2>Notes</h2>\n'
            buff += '  <p id="notes">%s</p>\n' % notes
        
        # annotations
        if self.annotations:
            tableID = 'tableAnnotations1'
            buff += '  <h2>Annotations</h2>\n'
            buff += '  <table id="tableAnnotations">\n'
            buff += '    <thead>\n'
            buff += '      <tr>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 0);" title="Sort by">Meas.&nbsp;m/z</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 1);" title="Sort by">Calc.&nbsp;m/z</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 2);" title="Sort by">&delta;&nbsp;(Da)</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 3);" title="Sort by">&delta;&nbsp;(ppm)</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 4);" title="Sort by">Rel.&nbsp;Int.&nbsp;(%)</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 5);" title="Sort by">Annotation</a></th>\n'
            buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 6);" title="Sort by">Formula</a></th>\n'
            buff += '      </tr>\n'
            buff += '    </thead>\n'
            buff += '    <tbody id="%s">\n' % tableID
            for annot in self.annotations:
                mz = mzFormat % annot.mz
                relIntensity = ''
                theoretical = ''
                deltaDa = ''
                deltaPpm = ''
                formula = ''
                label = self._replaceAnnotationIDs(annot.label)
                
                if basePeak:
                    relIntensity = '%0.2f' % (((annot.intensity-annot.baseline)/basePeak)*100)
                if annot.theoretical:
                    theoretical = mzFormat % annot.theoretical
                    deltaDa = mzFormat % annot.delta('Da')
                    deltaPpm = ppmFormat % annot.delta('ppm')
                if annot.formula:
                    formula = annot.formula
                
                buff += '      <tr><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td>%s</td><td class="nowrap">%s</td></tr>\n' % (mz, theoretical, deltaDa, deltaPpm, relIntensity, label, formula)
            buff += '    </tbody>\n'
            buff += '  </table>\n'
        
        # sequences
        if self.sequences:
            for x, sequence in enumerate(self.sequences):
                moMass = mzFormat % sequence.mass(0)
                avMass = mzFormat % sequence.mass(1)
                chain = self._formatSequence(sequence)
                coverage = self._getSequenceCoverage(sequence)
                tableID = 'tableSequenceMatches%d' % x
                
                buff += '  <h2>Sequence - <span>%s</span></h2>\n' % sequence.title
                buff += '  <table id="tableSequenceInfo">\n'
                buff += '    <thead>\n'
                buff += '      <tr><th>Aminoacids</th><th>Mo. Mass</th><th>Av. Mass</th><th>Coverage</th></tr>\n'
                buff += '    </thead>\n'
                buff += '    <tbody>\n'
                buff += '      <tr><td class="right">%s</td><td class="right">%s</td><td class="right">%s</td><td class="right">%s</td></tr>\n' % (len(sequence), moMass, avMass, coverage)
                buff += '      <tr><td colspan="4" class="sequence">%s</td></tr>\n' % chain
                buff += '    </tbody>\n'
                buff += '  </table>\n'
                
                if sequence.modifications:
                    buff += '  <table id="tableSequenceModifications">\n'
                    buff += '    <thead>\n'
                    buff += '      <tr><th>Position</th><th>Modification</th><th>Type</th><th>Mo.&nbsp;Mass</th><th>Av.&nbsp;Mass</th><th>Formula</th></tr>\n'
                    buff += '    </thead>\n'
                    buff += '    <tbody>\n'
                    for mod in self._formatModifications(sequence):
                        buff += '      <tr><td class="nowrap">%s</td><td>%s</td><td>%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="nowrap">%s</td></tr>\n' % mod
                    buff += '    </tbody>\n'
                    buff += '  </table>\n'
                
                if sequence.matches:
                    buff += '  <table id="tableSequenceMatches">\n'
                    buff += '    <thead>\n'
                    buff += '      <tr>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 0);" title="Sort by">Meas.&nbsp;m/z</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 1);" title="Sort by">Calc.&nbsp;m/z</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 2);" title="Sort by">&delta;&nbsp;(Da)</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 3);" title="Sort by">&delta;&nbsp;(ppm)</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 4);" title="Sort by">Rel.&nbsp;Int.&nbsp;(%)</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 5);" title="Sort by">Annotation</a></th>\n'
                    buff += '        <th><a href="" onclick="return sortTable(\''+tableID+'\', 6);" title="Sort by">Formula</a></th>\n'
                    buff += '      </tr>\n'
                    buff += '    </thead>\n'
                    buff += '    <tbody id="%s">\n' % tableID
                    for m in sequence.matches:
                        mz = mzFormat % m.mz
                        relIntensity = ''
                        theoretical = ''
                        deltaDa = ''
                        deltaPpm = ''
                        formula = ''
                        
                        if basePeak:
                            relIntensity = '%0.2f' % (((m.intensity-m.baseline)/basePeak)*100)
                        if m.theoretical:
                            theoretical = mzFormat % m.theoretical
                            deltaDa = mzFormat % m.delta('Da')
                            deltaPpm = ppmFormat % m.delta('ppm')
                        if m.formula:
                            formula = m.formula
                        
                        buff += '      <tr><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td class="right nowrap">%s</td><td>%s</td><td class="nowrap">%s</td></tr>\n' % (mz, theoretical, deltaDa, deltaPpm, relIntensity, m.label, formula)
                    buff += '    </tbody>\n'
                    buff += '  </table>\n'
        
        # add footer
        buff += '  <p id="footer">Generated by mMass &bull; Open Source Mass Spectrometry Tool &bull; <a href="http://www.mmass.org/" title="mMass homepage">www.mmass.org</a></p>\n'
        buff += '</body>\n'
        buff += '</html>'
        
        return buff
    # ----
    
    
    def _escape(self, text):
        """Clear special characters such as <> etc."""
        
        text = text.strip()
        search = ('&', '"', "'", '<', '>')
        replace = ('&amp;', '&quot;', '&#39;', '&lt;', '&gt;')
        for x, item in enumerate(search):
            text = text.replace(item, replace[x])
            
        return text
    # ----
    
    
    def _convertSpectrum(self, spectrum):
        """Convert spectrum data to compressed binary format coded by base64."""
        
        # convert data to binary
        mzArray = ''
        intArray = ''
        for point in spectrum:
            mzArray += struct.pack('f', point[0])
            intArray += struct.pack('f', point[1])
        
        # compress data by gz
        mzArray = zlib.compress(mzArray)
        intArray = zlib.compress(intArray)
        
        # convert to ascii by base64
        mzArray = base64.b64encode(mzArray)
        intArray = base64.b64encode(intArray)
        
        return mzArray, intArray
    # ----
    
    
    def _formatSequence(self, sequence):
        """Format sequence for report."""
        
        # get coverage
        coverage = len(sequence)*[0]
        for m in sequence.matches:
            if m.sequenceRange:
                for i in range(m.sequenceRange[0]-1, m.sequenceRange[1]):
                    coverage[i] = 1
        
        # format sequence
        buff = ''
        for x, amino in enumerate(sequence):
            attributes = ''
            
            if sequence.isModified(x, True):
                attributes += 'modified '
            if coverage[x]:
                attributes += 'matched '
            if attributes:
                buff += '<span class="%s">%s</span>' % (attributes, amino)
            else:
                buff += amino
            
            if not (x+1) % 10:
                buff += ' '
        
        return buff
    # ----
    
    
    def _formatModifications(self, sequence):
        """Format sequence modifications for report."""
        
        buff = []
        
        format = '%0.' + `config.main['mzDigits']` + 'f'
        for mod in sequence.modifications:
            name = mod[0]
            
            # format position
            if type(mod[1]) == int:
                position = '%s %s' % (sequence[mod[1]], mod[1]+1)
            else:
                position = 'All ' + mod[1]
            
            # format type
            if mod[2] == 'f':
                modtype = 'fixed'
            else:
                modtype = 'variable'
            
            # format masses
            massMo = format % mspy.modifications[name].mass[0]
            massAv = format % mspy.modifications[name].mass[1]
            
            # format formula
            formula = mspy.modifications[name].gainFormula
            if mspy.modifications[name].lossFormula:
                formula += ' - ' + mspy.modifications[name].lossFormula
            
            # append data
            buff.append((position, name, modtype, massMo, massAv, formula))
        
        return buff
    # ----
    
    
    def _getSequenceCoverage(self, sequence):
        """Get sequence coverage from matches."""
        
        # get ranges
        ranges = []
        for m in sequence.matches:
            if m.sequenceRange != None:
                ranges.append(m.sequenceRange)
        
        # get coverage
        coverage = mspy.coverage(ranges, len(sequence))
        coverage = '%.1f ' % coverage
        coverage += '%'
        
        return coverage
    # ----
    
    
    def _replaceAnnotationIDs(self, label):
        """Replace IDs with links in annotations."""
        
        # replace IDs
        for name in config.replacements:
            self._currentReplacement = name
            label = re.sub(config.replacements[name]['pattern'], self._replaceIDs, label)
        
        return label
    # ----
    
    
    def _replaceIDs(self, matchobj):
        """Replace IDs to links in annotations."""
        
        url = config.replacements[self._currentReplacement]['url'] % matchobj.group(1)
        return '<a href="%s" title="More information...">%s</a>' % (url, matchobj.group(0))
    # ----
    
    


# ANNOTATION OBJECT
# -----------------

class annotation():
    """Annotation object definition."""
    
    def __init__(self, label, mz, intensity, baseline=0., charge=None, radical=None, theoretical=None, formula=None):
        
        self.label = label
        self.mz = mz
        self.intensity = intensity
        self.baseline = baseline
        self.charge = charge
        self.radical = radical
        self.theoretical = theoretical
        self.formula = formula
    # ----
    
    
    def delta(self, units):
        """Get error in specified units."""
        
        if self.theoretical != None :
            return mspy.delta(self.mz, self.theoretical, units)
        else:
            return None
    # ----
    
    


# SEQUENCE MATCH OBJECT
# ---------------------

class match():
    """Match object definition."""
    
    def __init__(self, label, mz, intensity, baseline=0., charge=None, radical=None, theoretical=None, formula=None):
        
        self.label = label
        self.mz = mz
        self.intensity = intensity
        self.baseline = baseline
        self.charge = charge
        self.radical = radical
        self.theoretical = theoretical
        self.formula = formula
        
        self.sequenceRange = None
        self.fragmentSerie = None
        self.fragmentIndex = None
    # ----
    
    
    def delta(self, units):
        """Get error in specified units."""
        
        if self.theoretical != None :
            return mspy.delta(self.mz, self.theoretical, units)
        else:
            return None
    # ----
    
    


# MSD FORMAT PARSER
# -----------------

class parseMSD():
    """Parse data from mSD files."""
    
    def __init__(self, path):
        
        self.version = None
        self.path = path
        self.parsedData = None
        
        # init new document
        self.document = document()
        self.document.format = 'mSD'
        self.document.path = path
    # ----
    
    
    def getDocument(self):
        """Get document."""
        
        # parse data
        if not self.parsedData:
            try: self.parsedData = xml.dom.minidom.parse(self.path)
            except: return False
        
        # get version
        if not self.version:
            self._getVersion()
        
        # get data
        if self.version == '1.0':
            self.handleDescription()
            self.handleSpectrum()
            self.handlePeaklist_10()
            self.handleSequences_10()
            dirName, fileName = os.path.split(self.path)
            self.document.title = fileName[:-4]
        else:
            self.handleDescription()
            self.handleSpectrum()
            self.handlePeaklist()
            self.handleAnnotations()
            self.handleSequences()
        
        return self.document
    # ----
    
    
    def getSequences(self):
        """Get list of available sequences."""
        
        # parse data
        if not self.parsedData:
            try: self.parsedData = xml.dom.minidom.parse(self.path)
            except: return False
        
        # get version
        if not self.version:
            self._getVersion()
        
        # set handler
        if self.version == '1.0':
            handler = self.handleSequence_10
        else:
            handler = self.handleSequence
        
        # get sequence
        data = []
        sequenceTags = self.parsedData.getElementsByTagName('sequence')
        if sequenceTags:
            for sequenceTag in sequenceTags:
                sequence = handler(sequenceTag)
                if sequence:
                    data.append(sequence)
        
        return data
    # ----
    
    
    
    # CURRENT HANDLERS
    
    def handleDescription(self):
        """Get document info."""
        
        # get description
        descriptionTags = self.parsedData.getElementsByTagName('description')
        if descriptionTags:
            
            titleTags = descriptionTags[0].getElementsByTagName('title')
            if titleTags:
                self.document.title = self._getNodeText(titleTags[0])
            
            dateTags = descriptionTags[0].getElementsByTagName('date')
            if dateTags:
                self.document.date = dateTags[0].getAttribute('value')
            
            operatorTags = descriptionTags[0].getElementsByTagName('operator')
            if operatorTags:
                self.document.operator = operatorTags[0].getAttribute('value')
            
            contactTags = descriptionTags[0].getElementsByTagName('contact')
            if contactTags:
                self.document.contact = contactTags[0].getAttribute('value')
            
            institutionTags = descriptionTags[0].getElementsByTagName('institution')
            if institutionTags:
                self.document.institution = institutionTags[0].getAttribute('value')
            
            instrumentTags = descriptionTags[0].getElementsByTagName('instrument')
            if instrumentTags:
                self.document.instrument = instrumentTags[0].getAttribute('value')
            
            notesTags = descriptionTags[0].getElementsByTagName('notes')
            if notesTags:
                self.document.notes = self._getNodeText(notesTags[0])
    # ----
    
    
    def handleSpectrum(self):
        """Get spectrum data."""
        
        # get spectrum
        spectrumTags = self.parsedData.getElementsByTagName('spectrum')
        if spectrumTags:
            
            # get metadata
            scanNumber = spectrumTags[0].getAttribute('scanNumber')
            if scanNumber:
                try: self.document.spectrum.scanNumber = int(scanNumber)
                except ValueError: pass
            
            msLevel = spectrumTags[0].getAttribute('msLevel')
            if msLevel:
                try: self.document.spectrum.msLevel = int(msLevel)
                except ValueError: pass
            
            retentionTime = spectrumTags[0].getAttribute('retentionTime')
            if retentionTime:
                try: self.document.spectrum.retentionTime = float(retentionTime)
                except ValueError: pass
            
            precursorMZ = spectrumTags[0].getAttribute('precursorMZ')
            if precursorMZ:
                try: self.document.spectrum.precursorMZ = float(precursorMZ)
                except ValueError: pass
            
            precursorCharge = spectrumTags[0].getAttribute('precursorCharge')
            if precursorCharge:
                try: self.document.spectrum.precursorCharge = int(precursorCharge)
                except ValueError: pass
            
            polarity = spectrumTags[0].getAttribute('polarity')
            if polarity:
                try: self.document.spectrum.polarity = int(polarity)
                except ValueError: pass
            
            # get mzArray
            mzData = None
            mzArrayTags = spectrumTags[0].getElementsByTagName('mzArray')
            if mzArrayTags:
                compression = mzArrayTags[0].getAttribute('compression')
                endian = '<'
                if mzArrayTags[0].getAttribute('endian') == 'big':
                    endian = '>'
                
                mzData = self._getNodeText(mzArrayTags[0])
                mzData = self._convertDataPoints(mzData, compression, endian)
            
            # get intArray
            intData = None
            intArrayTags = spectrumTags[0].getElementsByTagName('intArray')
            if intArrayTags:
                compression = intArrayTags[0].getAttribute('compression')
                endian = '<'
                if intArrayTags[0].getAttribute('endian') == 'big':
                    endian = '>'
                
                intData = self._getNodeText(intArrayTags[0])
                intData = self._convertDataPoints(intData, compression, endian)
            
            # check data
            if not mzData or not intData:
                return False
            
            # format data
            points = map(list, zip(mzData, intData))
            points = numpy.array(points)
            
            # add to spectrum
            self.document.spectrum.points = points
    # ----
    
    
    def handlePeaklist(self):
        """Get peaklist."""
        
        peaklist = []
        
        # get peaklist
        peaklistTags = self.parsedData.getElementsByTagName('peaklist')
        if peaklistTags:
            
            # get peaks
            peakTags = peaklistTags[0].getElementsByTagName('peak')
            for peakTag in peakTags:
                
                # get data
                try:
                    mz = float(peakTag.getAttribute('mz'))
                    intensity = float(peakTag.getAttribute('intensity'))
                    
                    baseline = 0
                    sn = None
                    charge = None
                    isotope = None
                    fwhm = None
                    
                    if peakTag.hasAttribute('baseline'):
                        baseline = float(peakTag.getAttribute('baseline'))
                    if peakTag.hasAttribute('sn'):
                        sn = float(peakTag.getAttribute('sn'))
                    if peakTag.hasAttribute('charge'):
                        charge = int(peakTag.getAttribute('charge'))
                    if peakTag.hasAttribute('isotope'):
                        isotope = int(peakTag.getAttribute('isotope'))
                    if peakTag.hasAttribute('fwhm'):
                        fwhm = float(peakTag.getAttribute('fwhm'))
                    
                except ValueError:
                    continue
                
                # make peak
                peak = mspy.peak(mz=mz, intensity=intensity, baseline=baseline, sn=sn, charge=charge, isotope=isotope, fwhm=fwhm)
                peaklist.append(peak)
        
        # add peaklist to document
        peaklist = mspy.peaklist(peaklist)
        self.document.spectrum.peaklist = peaklist
    # ----
    
    
    def handleAnnotations(self):
        """Get annotations."""
        
        # get annotations
        annotationsTags = self.parsedData.getElementsByTagName('annotations')
        if annotationsTags:
            
            # get annotation
            annotationTags = annotationsTags[0].getElementsByTagName('annotation')
            for annotationTag in annotationTags:
                
                # get data
                try:
                    label = self._getNodeText(annotationTag)
                    mz = float(annotationTag.getAttribute('peakMZ'))
                    intensity = 0.
                    baseline = 0.
                    charge = None
                    radical = None
                    theoretical = None
                    formula = None
                    
                    if annotationTag.hasAttribute('peakIntensity'):
                        intensity = float(annotationTag.getAttribute('peakIntensity'))
                    if annotationTag.hasAttribute('peakBaseline'):
                        baseline = float(annotationTag.getAttribute('peakBaseline'))
                    if annotationTag.hasAttribute('charge'):
                        charge = int(annotationTag.getAttribute('charge'))
                    if annotationTag.hasAttribute('radical'):
                        radical = int(annotationTag.getAttribute('radical'))
                    if annotationTag.hasAttribute('calcMZ'):
                        theoretical = float(annotationTag.getAttribute('calcMZ'))
                    if annotationTag.hasAttribute('formula'):
                        formula = annotationTag.getAttribute('formula')
                    
                    annot = annotation(label=label, mz=mz, intensity=intensity, baseline=baseline, charge=charge, radical=radical, theoretical=theoretical, formula=formula)
                    
                except ValueError:
                    continue
                
                # append annotation
                self.document.annotations.append(annot)
            
            # sort annotations by mz
            self.document.sortAnnotations()
    # ----
    
    
    def handleSequences(self):
        """Get sequences."""
        
        # get sequences
        sequencesTags = self.parsedData.getElementsByTagName('sequences')
        if sequencesTags:
            sequenceTags = sequencesTags[0].getElementsByTagName('sequence')
            for sequenceTag in sequenceTags:
                sequence = self.handleSequence(sequenceTag)
                if sequence:
                    self.document.sequences.append(sequence)
    # ----
    
    
    def handleSequence(self, sequenceTag):
        """Get sequence."""
        
        # get title
        seqTitle = ''
        titleTags = sequenceTag.getElementsByTagName('title')
        if titleTags:
            seqTitle = self._getNodeText(titleTags[0])
        
        # get sequence
        chain = ''
        seqTags = sequenceTag.getElementsByTagName('seq')
        if seqTags:
            chain = self._getNodeText(seqTags[0])
        
        # make sequence
        try:
            sequence = mspy.sequence(chain, seqTitle)
            sequence.matches = []
        except:
            return False
        
        # get modifications
        modifications = []
        modificationTags = sequenceTag.getElementsByTagName('modification')
        for modificationTag in modificationTags:
            name = modificationTag.getAttribute('name')
            position = modificationTag.getAttribute('position')
            gainFormula = modificationTag.getAttribute('gainFormula')
            lossFormula = modificationTag.getAttribute('lossFormula')
            
            try: position = int(position)
            except: pass
            
            modtype = 'f'
            if modificationTag.getAttribute('type') == 'variable':
                modtype = 'v'
            
            if name in mspy.modifications:
                sequence.modify(name, position, modtype)
            else:
                if self._addModification(name, gainFormula, lossFormula):
                    sequence.modify(name, position, modtype)
        
        # get matches
        matches = []
        matchTags = sequenceTag.getElementsByTagName('match')
        for matchTag in matchTags:
            try:
                label = self._getNodeText(matchTag)
                mz = float(matchTag.getAttribute('peakMZ'))
                
                intensity = 0.
                baseline = 0.
                charge = None
                radical = None
                theoretical = None
                formula = None
                sequenceRange = None
                fragmentSerie = None
                fragmentIndex = None
                
                if matchTag.hasAttribute('peakIntensity'):
                    intensity = float(matchTag.getAttribute('peakIntensity'))
                if matchTag.hasAttribute('peakBaseline'):
                    baseline = float(matchTag.getAttribute('peakBaseline'))
                if matchTag.hasAttribute('charge'):
                    charge = int(matchTag.getAttribute('charge'))
                if matchTag.hasAttribute('radical'):
                    radical = int(matchTag.getAttribute('radical'))
                if matchTag.hasAttribute('calcMZ'):
                    theoretical = float(matchTag.getAttribute('calcMZ'))
                if matchTag.hasAttribute('formula'):
                    formula = matchTag.getAttribute('formula')
                if matchTag.hasAttribute('sequenceRange'):
                    sequenceRange = [int(x) for x in matchTag.getAttribute('sequenceRange').split('-')]
                if matchTag.hasAttribute('fragmentSerie'):
                    fragmentSerie = matchTag.getAttribute('fragmentSerie')
                if matchTag.hasAttribute('fragmentIndex'):
                    fragmentIndex = int(matchTag.getAttribute('fragmentIndex'))
                
                m = match(label=label, mz=mz, intensity=intensity, baseline=baseline, charge=charge, radical=radical, theoretical=theoretical, formula=formula)
                m.sequenceRange = sequenceRange
                m.fragmentSerie = fragmentSerie
                m.fragmentIndex = fragmentIndex
                
                sequence.matches.append(m)
            
            except ValueError:
                continue
        
        return sequence
    # ----
    
    
    
    # OLDER VERSIONS
    
    def handlePeaklist_10(self):
        """Get peaklist from mSD version 1.0."""
        
        peaklist = []
        
        # get peaklist
        peaklistTags = self.parsedData.getElementsByTagName('peaklist')
        if peaklistTags:
            
            # get peaks
            peakTags = peaklistTags[0].getElementsByTagName('peak')
            for peakTag in peakTags:
                
                # get data
                try:
                    mz = float(peakTag.getAttribute('mass'))
                    intensity = float(peakTag.getAttribute('intens'))
                    annot = peakTag.getAttribute('annots')
                except ValueError:
                    continue
                
                # make peak
                peak = mspy.peak(mz=mz, intensity=intensity)
                peaklist.append(peak)
                
                # make annotation
                if annot:
                    self.document.annotations.append(annotation(label=annot, mz=mz, intensity=intensity))
        
        # add peaklist to document
        peaklist = mspy.peaklist(peaklist)
        self.document.spectrum.peaklist = peaklist
    # ----
    
    
    def handleSequences_10(self):
        """Get sequences from mSD version 1.0."""
        
        # get sequences
        sequencesTags = self.parsedData.getElementsByTagName('sequences')
        if sequencesTags:
            sequenceTags = sequencesTags[0].getElementsByTagName('sequence')
            for sequenceTag in sequenceTags:
                sequence = self.handleSequence_10(sequenceTag)
                if sequence:
                    self.document.sequences.append(sequence)
    # ----
    
    
    def handleSequence_10(self, sequenceTag):
        """Get sequence from mSD version 1.0."""
        
        # get title
        seqTitle = ''
        titleTags = sequenceTag.getElementsByTagName('title')
        if titleTags:
            seqTitle = self._getNodeText(titleTags[0])
        
        # get sequence
        seq = ''
        seqTags = sequenceTag.getElementsByTagName('seq')
        if seqTags:
            seq = self._getNodeText(seqTags[0])
        
        # make sequence
        try:
            sequence = mspy.sequence(seq, seqTitle)
            sequence.matches = []
        except:
            return False
        
        # get modifications
        modifications = []
        modificationTags = sequenceTag.getElementsByTagName('modification')
        for modificationTag in modificationTags:
            name = modificationTag.getAttribute('name')
            amino = modificationTag.getAttribute('amino')
            position = modificationTag.getAttribute('position')
            gainFormula = modificationTag.getAttribute('gain')
            lossFormula = modificationTag.getAttribute('loss')
            
            if position:
                position = int(position)-1
            else:
                position = amino
            
            if name in mspy.modifications:
                sequence.modify(name, position)
            else:
                if self._addModification(name, gainFormula, lossFormula):
                    sequence.modify(name, position)
        
        return sequence
    # ----
    
    
    
    # UTILITIES
    
    def _convertDataPoints(self, data, compression, endian):
        """Convert spectrum data points."""
        
        try:
            
            # convert from base64
            data = base64.b64decode(data)
            
            # decompress
            if compression:
                data = zlib.decompress(data)
            
            # convert form binary
            count = len(data) / struct.calcsize(endian + 'f')
            data = struct.unpack(endian + 'f' * count, data[0:len(data)])
            
            return data
        
        except:
            return False
    # ----
    
    
    def _getVersion(self):
        """Get mSD format version."""
        
        # mSD document
        mSDTags = self.parsedData.getElementsByTagName('mSD')
        if mSDTags:
            self.version = mSDTags[0].getAttribute('version')
            return
        
        # mMassDoc document
        mMassDocTags = self.parsedData.getElementsByTagName('mMassDoc')
        if mMassDocTags:
            self.version = mMassDocTags[0].getAttribute('version')
            return
    # ----
    
    
    def _getNodeText(self, node):
        """Get text from node list."""
        
        # get text
        buff = ''
        for node in node.childNodes:
            if node.nodeType == node.TEXT_NODE:
                buff += node.data
        
        # replace back some characters
        search = ('&amp;', '&quot;', '&#39;', '&lt;', '&gt;')
        replace = ('&', '"', "'", '<', '>')
        for x, item in enumerate(search):
            buff = buff.replace(item, replace[x])
        
        return buff
    # ----
    
    
    def _addModification(self, name, gainFormula, lossFormula):
        """Add modification to the library."""
        
        # check name
        if not name or name in mspy.modifications:
            return False
        
        # check gain and loss
        if not gainFormula and not lossFormula:
            return False
        try:
            formula = mspy.compound(gainFormula)
            formula = mspy.compound(lossFormula)
        except:
            return False
        
        # make modification
        modification = mspy.modification(name=name, gainFormula=gainFormula, lossFormula=lossFormula, aminoSpecifity='ACDEFGHIKLMNPQRSTVWY')
        mspy.modifications[name] = modification
        mspy.saveModifications()
        
        return True
    # ----
    
    

# REPORT
# ------

REPORT_HEADER = """<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="cs" lang="cs">
<head>
  <meta http-equiv="content-type" content="text/html; charset=utf-8" />
  <meta name="author" content="Created by mMass - Open Source Mass Spectrometry Tool; www.mmass.org" />
  <title>mMass Report</title>
  <style type="text/css">
  <!--
    body{margin: 5%; font-size: 8.5pt; font-family: Arial, Verdana, Geneva, Helvetica, sans-serif;}
    h1{font-size: 1.5em; text-align: center; margin: 1em 0; border-bottom: 3px double #000;}
    h1 span{font-style: italic;}
    h2{font-size: 1.2em; text-align: left; margin: 2em 0 1em 0; border-bottom: 1px solid #000;}
    h2 span{font-style: italic;}
    table{border-collapse: collapse; margin: 1.5em auto; width: 100%; background-color: #fff;}
    thead{display: table-header-group;}
    th,td{font-size: .75em; border: 1px solid #aaa; padding: .3em; vertical-align: top; text-align: left;}
    html>body th, html>body td{font-size: .9em;}
    th{text-align: center; color: #000; background-color: #ccc;}
    th a{text-align: center; color: #000; background-color: #ccc; text-decoration: none;}
    #tableMainInfo th{text-align: right; width: 20%;}
    #tableMainInfo td{text-align: left;}
    #spectrum{text-align: center;}
    #footer{font-size: .8em; font-style: italic; text-align: center; color: #aaa; margin: 2em 0 1em 0; padding-top: 0.5em; border-top: 1px solid #000;}
    .nowrap{white-space:nowrap;}
    .right{text-align: right;}
    .sequence{font-size: 1.1em; font-family: monospace;}
    .modified{color: #f00; font-weight: bold;}
    .matched{text-decoration: underline;}
  -->
  </style>
  <script type="text/javascript">
    // This script was adapted from the original script by Mike Hall (www.brainjar.com)
    //<![CDATA[
    
    // for IE
    if (document.ELEMENT_NODE == null) {
      document.ELEMENT_NODE = 1;
      document.TEXT_NODE = 3;
    }
    
    // sort table
    function sortTable(id, col) {
      
      // get table
      var tblEl = document.getElementById(id);
      
      // init sorter
      if (tblEl.reverseSort == null) {
        tblEl.reverseSort = new Array();
      }
      
      // reverse sorting
      if (col == tblEl.lastColumn) {
        tblEl.reverseSort[col] = !tblEl.reverseSort[col];
      }
      
      // remember current column
      tblEl.lastColumn = col;
      
      // sort table
      var tmpEl;
      var i, j;
      var minVal, minIdx;
      var testVal;
      var cmp;
      
      for (i = 0; i < tblEl.rows.length - 1; i++) {
        minIdx = i;
        minVal = getTextValue(tblEl.rows[i].cells[col]);
        
        // walk in other rows
        for (j = i + 1; j < tblEl.rows.length; j++) {
          testVal = getTextValue(tblEl.rows[j].cells[col]);
          cmp = compareValues(minVal, testVal);
          
          // reverse sorting
          if (tblEl.reverseSort[col]) {
            cmp = -cmp;
          }
          
          // set new minimum
          if (cmp > 0) {
            minIdx = j;
            minVal = testVal;
          }
        }
        
        // move row before
        if (minIdx > i) {
          tmpEl = tblEl.removeChild(tblEl.rows[minIdx]);
          tblEl.insertBefore(tmpEl, tblEl.rows[i]);
        }
      }
      
      return false;
    }
    
    // get node text
    function getTextValue(el) {
      var i;
      var s;
      
      // concatenate values of text nodes
      s = "";
      for (i = 0; i < el.childNodes.length; i++) {
        if (el.childNodes[i].nodeType == document.TEXT_NODE) {
          s += el.childNodes[i].nodeValue;
        } else if (el.childNodes[i].nodeType == document.ELEMENT_NODE && el.childNodes[i].tagName == "BR") {
          s += " ";
        } else {
          s += getTextValue(el.childNodes[i]);
        }
      }
      
      return s;
    }
    
    // compare values
    function compareValues(v1, v2) {
      var f1, f2;
      
      // lowercase values
      v1 = v1.toLowerCase()
      v2 = v2.toLowerCase()
      
      // try to convert values to floats
      f1 = parseFloat(v1);
      f2 = parseFloat(v2);
      if (!isNaN(f1) && !isNaN(f2)) {
        v1 = f1;
        v2 = f2;
      }
      
      // compare values
      if (v1 == v2) {
        return 0;
      } else if (v1 > v2) {
        return 1;
      } else {
        return -1;
      }
    }
    
    //]]>
  </script>
</head>

<body>
"""