001package org.biopax.paxtools.controller;
002
003import org.apache.commons.logging.Log;
004import org.apache.commons.logging.LogFactory;
005import org.biopax.paxtools.model.BioPAXElement;
006import org.biopax.paxtools.model.Model;
007import org.biopax.paxtools.model.level2.*;
008
009import java.util.*;
010
011
012/**
013 *
014 * This class is intended to merge and to integrate biopax models
015 * not necessarily from the same resource - if models allow such a
016 * thing. This class has very similar functionality to the controller.Merger
017 * but it differs in means of merging/integrating methodology.
018 *
019 * Integrator iterates all the conversions in from the <em>target</em> and
020 * <em>source</em> model(s), and assigns scores indicating their similarity.
021 * After the scoring process is completed, it then starts integrating conversions
022 * having the highest score until it reaches the <em>threshold</em> value. After
023 * this conversion based integration is accomplished, all the models are merged
024 * into the <em>target</em>.
025 *
026 * Please note that this class is in its beta state.
027 */
028public class Integrator {
029
030    private static final Log log = LogFactory.getLog(Integrator.class);
031    private EditorMap editorMap;
032    private Merger merger;
033    private Model target, mergedSources = null;
034
035    private boolean onlyMapping = false;
036    private boolean selfRemove  = false;
037    private boolean normalizeModels = false;
038
039    /**
040     *  This is the main score matrix
041     *
042     *      |   D   |   E   |   F   |
043     * ------------------------------
044     * A    |       |       |       |
045     * ------------------------------
046     * B    |       |       |       |
047     * ------------------------------
048     * C    |       |       |       |
049     * ------------------------------
050     */
051    private Map<physicalEntityParticipant,
052                Map<physicalEntityParticipant, Double>> pepScoreMatrix
053                    = new HashMap<physicalEntityParticipant,
054                        Map<physicalEntityParticipant, Double>>();
055
056    /**
057     * This is the pool where the scores and relevant conversions
058     * will be stored. Other than this global one, there will be
059     * a local copy to enable the user handle different threshold
060     * values one at a time.
061     */
062    private List<ConversionScore> similarConversions;
063
064    private final String[][] dbChanges
065            =  {
066                {"Chemical Entities of Biological Interest", "ChEBI"}
067               };
068
069    private Set<Set<String>> relatedTerms = new HashSet<Set<String>>();
070    private String[][] termLists =
071            {
072                    {"active", "active1", "active2", "phosphorylation", "phosphate group", "phosphorylation site"},
073                    {"inactive", "phosphorylation", "phosphate group", "phosphorylation site"}
074
075            };
076
077    private String[][] locLists =
078                {
079                        {"cytoplasm", "cytosol"}
080                };
081
082    /* Globalling tricks & fine-tuning */
083    private final double SIZE_MISMATCH_PENALTY = 0.7;
084    private final double BASE_SCORE = 0.4;
085    private double SCORES_OVER = 100.0;
086    private final double MAX_PEP_SCORE = 3.5;
087
088    private final double STATS_OVER = 1000.0; // For info messages like "2/100 completed"
089
090    private double threshold = SCORES_OVER; // Max. threshold
091
092
093    /**
094     *
095     * @param editorMap map to be used in order to initialize merger
096     * @param target target model into which integration will be done
097     * @param sources targets that are going to be integrated into target
098     *
099     * @see org.biopax.paxtools.controller.Merger
100     */
101    public Integrator(EditorMap editorMap, Model target, Model... sources) {
102        this.editorMap = editorMap;
103        this.merger = new Merger(editorMap);
104        this.target = target;
105
106        log.info(sources.length + " source model(s) will be merged.");
107        // Merge all "sources" into one single model
108        for(Model source : sources) {
109            if( mergedSources == null )
110                mergedSources = source;
111            else
112                merger.merge(mergedSources, source);
113        }
114        log.info("Merging finished.");
115
116        if( isNormalizeModels() ) {
117            log.info("Normalizing models.");
118
119            log.info("Normaling XREFs.");
120            normalizeXrefs(target);
121            normalizeXrefs(mergedSources);
122            log.info("Normaling OCVs.");
123            normalizeOpenControlledVocabulary(mergedSources);
124            log.info("Normaling cellular locations.");
125            normalizeCellularLocations(mergedSources);
126
127            log.info("Normalization completed.");
128        }
129    }
130
131    /**
132     * Sets the threshold value (the smallest score for integrating
133     * two conversions)
134     *
135     * @param threshold value
136     *
137     * @see #setScoresOver(double)
138     */
139    public void setThreshold(Double threshold) {
140        this.threshold = threshold;
141    }
142
143    /**
144     * Returns the threshold value (the smallest score for integrating
145     * two conversions)
146     *
147     * @return a double value (default: 100.0)
148     */
149    public Double getThreshold() {
150        return threshold;
151    }
152
153    /**
154     * Enables/disables integration. If <em>only mapping</em> feature is
155     * set to true, integrator will only assign scores to conversion and
156     * exits. This option may help to build interactive programs.
157     *
158     * @param mapping true for skipping integration
159     *
160     * @see #integrate()
161     */
162    public void setOnlyMapping(boolean mapping) {
163        this.onlyMapping = mapping;
164    }
165
166    /**
167     *
168     * @return true for enabled "only mapping", false otherwise
169     *
170     * @see #setOnlyMapping(boolean)
171     */
172    boolean isOnlyMapping() {
173        return onlyMapping;
174    }
175
176    /**
177     * Enables removal of elements from the <em>target</em> if they are contained
178     * both in source and target, and have a match with another conversion. Useful for
179     * integrating of a model by itself. Default is false.
180     *
181     * @param selfRemove true for enabling removal, false otherwise
182     */
183    public void setSelfRemove(boolean selfRemove) {
184        this.selfRemove = selfRemove;
185    }
186
187    /**
188     *
189     * @return true for enabled removal, false otherwise
190     *
191     * @see #setSelfRemove(boolean)
192     */
193    boolean isSelfRemove() {
194        return selfRemove;
195    }
196
197    /**
198     * Fixes some of the known Open Controlled Vocabullary issues in the models.
199     * It is best to try integration with this option enabled (true) and
200     * disabled (false) to see which gives a better result. Default is false.
201     *
202     * @param normalizeModels true for normalization of OCVs
203     */
204    public void setNormalizeModels(boolean normalizeModels) {
205        this.normalizeModels = normalizeModels;
206    }
207
208    /**
209     *
210     * @return true for normalization of OCVs, false otherwise (default)
211     *
212     * @see #setNormalizeModels(boolean)
213     */
214    boolean isNormalizeModels() {
215        return normalizeModels;
216    }
217
218
219    /**
220     * @see #setScoresOver(double)
221     *
222     * @return a double indicating maximum score
223     */
224    double getScoresOver() {
225        return SCORES_OVER;
226    }
227
228    /**
229     * A score between two conversions is in the interval (0, 1].
230     * Setting a <em>scoresOver</em> value will the map this range to
231     * (0, scoresOver]. Default value is 100.0, so the default score
232     * range is (0,100]. This setting does not alter the integration
233     * process. It only multiplies the scores with the given value.
234     *
235     * @param scoresOver a double score
236     */
237    public void setScoresOver(double scoresOver) {
238        this.SCORES_OVER = scoresOver;
239    }
240
241    /**
242         * Integrates <em>target</em> and <em>source</em>(s) and returns a
243         * sorted (desc) list of conversion scores.
244         *
245         * @see #setNormalizeModels(boolean)
246         * @see #setOnlyMapping(boolean)
247         * @see #setScoresOver(double)
248         * @see #setSelfRemove(boolean)
249         * @see #setThreshold(Double)
250         *
251         * @return a sorted list of ConversionScores
252         */
253    public List<ConversionScore> integrate() {
254        return integrate(null);
255    }
256
257    /**
258     * Does the integration using user-provided scores list.
259     *
260     * @see #integrate()
261     *
262     * @param alternativeScores alternative scores, can be null
263     * @return a sorted list of ConversionScores
264     */
265   public List<ConversionScore> integrate(List<ConversionScore> alternativeScores) {
266        Map<physicalEntityParticipant, Map<physicalEntityParticipant, Double>>
267                    pepScoreMatrix = this.pepScoreMatrix;
268        List<ConversionScore> similarConversions;
269        // There is something wrong with the sources, just quit
270        if(mergedSources == null) {
271            log.warn("Either target or source is empty, skipping integration.");
272            return null;
273        }
274
275        log.info("Scoring all the PEPs.");
276        /* If it is a first run, we need to calculate all scores,
277         * but if it is not, we can save some CPU time.
278         */
279        if( pepScoreMatrix.isEmpty() ) { // first run
280            createPEPScoreMatrix(target.getObjects(physicalEntityParticipant.class),
281                                     mergedSources.getObjects(physicalEntityParticipant.class));
282
283            log.info("Scoring PEPs finished.");
284
285            log.info("Scoring conversions");
286            this.similarConversions = createConversionScoreMap(pepScoreMatrix,
287                                                            target.getObjects(conversion.class),
288                                                            mergedSources.getObjects(conversion.class));
289            log.info("Scoring conversions finished.");
290        }
291
292        if(this.similarConversions == null)
293                this.similarConversions = new ArrayList<ConversionScore>();
294
295        /* Original score matrixes won't be modified for a later use
296         * Instead, we are going to copy them, and modify their copies.
297         */
298        log.info("Creating a copy of the PEP scores.");
299        Map<physicalEntityParticipant,
300            Map<physicalEntityParticipant, Double>> copyMatrix
301                            = new HashMap<physicalEntityParticipant, Map<physicalEntityParticipant, Double>>();
302            // Copy the contents of the matrix
303        for(physicalEntityParticipant pepKey: pepScoreMatrix.keySet()) {
304            copyMatrix.put(pepKey,
305                    new HashMap<physicalEntityParticipant, Double>(pepScoreMatrix.get(pepKey)));
306        }
307        // We want to use the copy now
308        pepScoreMatrix = copyMatrix;
309        log.info("PEP scores copied.");
310
311        similarConversions = (alternativeScores == null)
312                                ? new ArrayList<ConversionScore>(this.similarConversions)
313                                : alternativeScores;
314
315        log.info("Conversion scores copied.");
316        /* End of copies */
317
318        log.info("Mapping conversions/PEPs with a threshold: " + getThreshold());
319        mapConversions(similarConversions, pepScoreMatrix);
320        log.info("Mapping finished.");
321
322        // Sorting is essential for #equalizeEntities. If you are to
323        // modify this sort, check there also!
324        log.info("Sorting scores (" + similarConversions.size() + " scores).");
325        Collections.sort(similarConversions);
326        Collections.reverse(similarConversions);
327        log.info("Sorting finished.");
328
329        if( isOnlyMapping() ) {
330            log.info("Skipping model integration.");
331        } else {
332            log.info("Entities of similar conversions are being eqalized.");
333            equalizeEntities(similarConversions);
334
335            log.info("Merging integrated models.");
336            merger.merge(target, mergedSources);
337            log.info("Merging finished.");
338        }
339
340        log.info("Integration completed.");
341        return similarConversions;
342    }
343
344    private void equalizeEntities(List<ConversionScore> similarConversions) {
345        Set<conversion> doNotModifySet = new HashSet<conversion>();
346        Set<ConversionScore> containsSelfRemoved = new HashSet<ConversionScore>();
347
348        for(ConversionScore convScore: similarConversions) {
349            // Since we sorted the list, we are safe to break
350            // But a continue will also do the trick, mostly
351            // requiring little more time
352            if( convScore.getScore() < getThreshold() )
353                break;
354
355            conversion conv1 = convScore.getConversion1(),
356                       conv2 = convScore.getConversion2();
357            // If they are already the same, pass
358            if( conv1.getRDFId().equals(conv2.getRDFId()) )
359                continue;
360
361            // Do not modify it twice
362            if( doNotModifySet.contains(conv2) ) {
363                log.info(conv2.getRDFId() + " has already been modified. Skipped.");
364                continue;
365            }
366
367            // Self remove operations
368            if( isSelfRemove() ) {
369                // Remove "conv2" from target, if the corresponding flag is set true
370                BioPAXElement eqBPE = target.getByID(conv2.getRDFId());
371                if( eqBPE != null ) {
372                    target.remove( eqBPE );
373                    log.info("Self removing: " + eqBPE.getRDFId());
374
375                    // Collect other matches of will-be-removed element.
376                    for(ConversionScore tempCS: similarConversions) {
377                        if( tempCS.getConversion1().equals(eqBPE) )
378                            containsSelfRemoved.add(tempCS);
379                    }
380                } else if( containsSelfRemoved.contains(convScore) )
381                        continue;
382            }
383
384            // Three things to make equal: conversions themselves, matched PEPs, their controls
385            equalize(conv1, conv2);
386            if( convScore.isReverseMatch() )
387                changeDirection(conv2);
388
389            for(physicalEntityParticipant pep1: convScore.getMatchedPEPs() ) {
390                physicalEntityParticipant pep2 = convScore.getMatch(pep1);
391                // We got the match, now set lets build sets of PEPs of equal states
392                equalizePEP(pep1, pep2);
393            } // End of score maximazing
394
395            for( control control1: conv1.isCONTROLLEDOf() ) {
396                for( control control2: conv2.isCONTROLLEDOf() ) {
397                    boolean allSimilar = true;
398                    for(physicalEntityParticipant controller1: control1.getCONTROLLER() ) {
399                        for(physicalEntityParticipant controller2: control2.getCONTROLLER() )  {
400                            if( getScore(controller1, controller2) > BASE_SCORE ) {
401                                equalizePEP(controller1, controller2);
402                            } else {
403                                allSimilar = false;
404                            }
405                        }
406                    }
407                    if( allSimilar // size 0 causes false equivalance, thus regard that case
408                            && !(control1.getCONTROLLER().size() == 0 ^ control2.getCONTROLLER().size() == 0))
409                    {
410                        equalize(control1, control2);
411
412                        if( convScore.isReverseMatch() && control2 instanceof catalysis)
413                            changeDirection((catalysis) control2);
414                    }
415                }
416            }
417
418            // We are done with conv2
419            doNotModifySet.add(conv2);
420        }
421    }
422
423    /**
424     * @deprecated setRDFId is not available anymore!
425     */
426    private void equalize(BioPAXElement e1, BioPAXElement e2) {
427        // Operation below is enough for the time being
428        // TODO re-factoring: setRDFId is not available anymore! (We don't really want to change rdfIDs, do we?..)
429        //e2.setRDFId(e1.getRDFId());
430
431        throw new UnsupportedOperationException("This needs re-factoring: bpe.setRDFId is not available anymore!");
432
433        //TODO ? use some alternative way to store that a1 equals e2, e.g., Set<String> matched,
434        //matched.add(e1.getRDFId()+e2.getRDFId()); matched.add(e2.getRDFId()+e1.getRDFId());
435    }
436
437    private boolean equals(BioPAXElement a, BioPAXElement b) {
438        throw new UnsupportedOperationException("not implemented yet.");
439        // TODO ? implement equals(BioPAXElement a, BioPAXElement b): can be smth. like the following... and use below
440        //return (a == null) ? b == null : a.equals(b) || matched.contains(a.getRDFId()+b.getRDFId());
441    }
442
443    private void equalizePEP(physicalEntityParticipant controller1, physicalEntityParticipant controller2) {
444        // There is a special case for PEPs: we also need to update equivalent PEPs' fields
445        Set<physicalEntityParticipant> tempEqvPeps = new HashSet<physicalEntityParticipant>();
446        tempEqvPeps.addAll(getEquivalentsOfPEP(controller1));
447        tempEqvPeps.addAll(getEquivalentsOfPEP(controller2));
448        for(physicalEntityParticipant eqPep : tempEqvPeps)
449            updatePepFields(eqPep, controller2);
450
451        for(physicalEntityParticipant eqPep : tempEqvPeps)
452            updatePepFields(controller2, eqPep);
453
454        equalize(controller1, controller2);
455    }
456
457    private Set<physicalEntityParticipant> getEquivalentsOfPEP(physicalEntityParticipant onePep) {
458        Set<physicalEntityParticipant> eqGrp = new HashSet<physicalEntityParticipant>();
459        for(physicalEntityParticipant aPep : onePep.getPHYSICAL_ENTITY().isPHYSICAL_ENTITYof() ) {
460            if(aPep.isInEquivalentState(onePep))
461                eqGrp.add(aPep);
462        }
463
464        return eqGrp;
465    }
466
467    private void changeDirection(conversion conv) {
468        SpontaneousType st = conv.getSPONTANEOUS();
469
470        /* One possibility is below, but no need to operate
471        if( st == ConversionDirectionType.NOT_SPONTANEOUS || st == null )
472            return;
473        */
474        if( st == SpontaneousType.L_R )
475            conv.setSPONTANEOUS(SpontaneousType.R_L);
476        else if( st == SpontaneousType.R_L )
477            conv.setSPONTANEOUS(SpontaneousType.L_R);
478    }
479
480    private void changeDirection(catalysis cat) {
481        Direction ct = cat.getDIRECTION();
482
483        if( ct == Direction.IRREVERSIBLE_LEFT_TO_RIGHT )
484            cat.setDIRECTION(Direction.IRREVERSIBLE_RIGHT_TO_LEFT);
485        else if( ct == Direction.IRREVERSIBLE_RIGHT_TO_LEFT )
486            cat.setDIRECTION(Direction.IRREVERSIBLE_LEFT_TO_RIGHT);
487        else if( ct == Direction.PHYSIOL_LEFT_TO_RIGHT )
488            cat.setDIRECTION(Direction.PHYSIOL_RIGHT_TO_LEFT);
489        else if( ct == Direction.PHYSIOL_RIGHT_TO_LEFT)
490            cat.setDIRECTION(Direction.PHYSIOL_LEFT_TO_RIGHT);
491
492        /* One possibility is below, but no need to operate
493        else if( ct == CatalysisDirection.REVERSIBLE)
494            return;
495        */
496    }
497
498    private void mapConversions(Collection<ConversionScore> similarConversions,
499                                Map<physicalEntityParticipant,
500                                        Map<physicalEntityParticipant, Double>> pepScoreMatrix) {
501        // To get rid of Concurrent modification :|
502        Set<ConversionScore> toBeUpdated = new HashSet<ConversionScore>();
503
504        for(ConversionScore convScore : similarConversions) {
505            // Check if the score is equal to or higher than the threshold
506            if( convScore.getScore() < getThreshold() )
507                continue;
508
509            // Get matches of PEPs of first conversion
510            for(physicalEntityParticipant pep1: convScore.getMatchedPEPs() ) {
511                physicalEntityParticipant pep2 = convScore.getMatch(pep1);
512
513                // We got the match, now set their score to max
514                pepScoreMatrix.get(pep1).put(pep2, MAX_PEP_SCORE);
515            } // End of score maximazing
516
517            // Remember this
518            toBeUpdated.add(convScore);
519        }
520
521        // Now we know which scores are affected, lets replace them
522        for(ConversionScore convScore: toBeUpdated) {
523            // Remove it from similarConversion
524            similarConversions.remove(convScore);
525
526            // Add new score
527            similarConversions.add( getScore(pepScoreMatrix,
528                                        convScore.getConversion1(),
529                                        convScore.getConversion2()) );
530        }
531
532    }
533
534    private List<ConversionScore> createConversionScoreMap(Map<physicalEntityParticipant,
535                                                Map<physicalEntityParticipant, Double>> pepScoreMatrix,
536                                                           Set<conversion> convSet1, Set<conversion> convSet2) {
537        List<ConversionScore> similarConversions = new ArrayList<ConversionScore>();
538
539        double totalSize = convSet1.size() * convSet2.size();
540        double convCnt = 0;
541
542        for(conversion conv1: convSet1) {
543            for(conversion conv2: convSet2) {
544                // No need to compare conversions of different types
545                if( !((conv1 instanceof biochemicalReaction && conv2 instanceof biochemicalReaction)
546                   || (conv1 instanceof complexAssembly && conv2 instanceof complexAssembly)
547                   || (conv1 instanceof transport && conv2 instanceof transport)) )
548                {
549                    convCnt++;
550                    continue;
551                }
552
553                if( conv1.getRDFId().equals(conv2.getRDFId())) { // If they are the same
554                    convCnt++;
555                    continue;
556                }
557
558                ConversionScore convScore = getScore(pepScoreMatrix, conv1, conv2);
559                similarConversions.add(convScore);
560
561                if( convCnt % Math.ceil(totalSize/STATS_OVER) == 0 ) {
562                    log.info( " - " + (convCnt / Math.ceil(totalSize/STATS_OVER))
563                                    + "/" + STATS_OVER + " completed.");
564                }
565
566                convCnt++;
567
568            }
569        }
570
571        return similarConversions;
572    }
573
574    private void createPEPScoreMatrix(Collection<physicalEntityParticipant> pepSet1,
575                                      Collection<physicalEntityParticipant> pepSet2) {
576
577        // If it is not empty, no need to calculate it again
578        assert pepScoreMatrix.isEmpty();
579        double totalSize = pepSet1.size() * pepSet2.size();
580
581        double pepCnt = 0;
582        for(physicalEntityParticipant pep1 : pepSet1) {
583            // Create a new row for a PEP
584            Map<physicalEntityParticipant, Double> pep1Row
585                = new HashMap<physicalEntityParticipant, Double>();
586            pepScoreMatrix.put(pep1, pep1Row);
587
588            // Fill the row with the corresponding scores
589            for(physicalEntityParticipant pep2 : pepSet2) {
590                if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(),
591                        pep2.getPHYSICAL_ENTITY()) ) {
592                    Double score = getScore(pep1, pep2);
593                    pep1Row.put(pep2, score);
594                }
595
596                if( pepCnt % Math.ceil(totalSize/STATS_OVER) == 0 ) {
597                    log.info( " - " + (pepCnt / Math.ceil(totalSize/STATS_OVER))
598                                    + "/" + STATS_OVER + " completed.");
599                }
600
601                pepCnt++;
602            }
603        }
604
605    }
606
607    private boolean complexScoreHelper(physicalEntity cPe, physicalEntity pe) {
608        if(cPe instanceof complex && pe instanceof complex) {
609            for(physicalEntityParticipant tmpPep : ((complex) cPe) .getCOMPONENTS() ) {
610                if( !complexScoreHelper(pe, tmpPep.getPHYSICAL_ENTITY()) )
611                    return false;
612            }
613            return true;
614        } else if( cPe instanceof complex ) {
615            for(physicalEntityParticipant tmpPep : ((complex) cPe) .getCOMPONENTS() ) {
616                if( complexScoreHelper(tmpPep.getPHYSICAL_ENTITY(), pe) )
617                    return true;
618            }
619            return false;
620        } else {
621            return cPe.equals(pe);
622        }
623    }
624
625    private Double getScore(physicalEntityParticipant pep1,
626                                physicalEntityParticipant pep2) {
627        double totalScore = .0;
628
629        if((pep1 instanceof sequenceParticipant ^ pep2 instanceof sequenceParticipant)
630            && !(pep1.getPHYSICAL_ENTITY() instanceof smallMolecule
631                    && pep2.getPHYSICAL_ENTITY() instanceof smallMolecule) )
632            return BASE_SCORE;
633
634        if( pep1.getPHYSICAL_ENTITY().equals(pep2.getPHYSICAL_ENTITY()) )
635            totalScore += 2.5;
636        else if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY())
637              && complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY()) )
638            totalScore += 2.35;
639        else if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY())
640              || complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY()) )
641            totalScore += 2;
642        else
643            return BASE_SCORE;
644
645        if( pep1.isInEquivalentState(pep2) )
646            totalScore += 1;
647        else {
648             if( isSeqParTermsSimilar(pep1, pep2) )
649                totalScore += .8;
650             else if( isCellularLocsSimilar(pep1, pep2) )
651                totalScore += .8;
652        }
653
654        return totalScore;
655    }
656
657    private boolean isCellularLocsTermsSimilar(Set<String> fTerms, Set<String> sTerms) {
658        for( String[] locList : locLists )
659            for( String fterm : fTerms )
660                for( String sterm : sTerms )
661                    if( Arrays.asList(locList).contains(fterm) && Arrays.asList(locList).contains(sterm))
662                        return true;
663
664        return false;
665    }
666
667    private boolean isCellularLocsSimilar(physicalEntityParticipant fPep,
668                                          physicalEntityParticipant sPep) {
669        return !(fPep.getCELLULAR_LOCATION() != null && sPep.getCELLULAR_LOCATION() != null)
670               || isCellularLocsTermsSimilar(fPep.getCELLULAR_LOCATION().getTERM(),
671                                             sPep.getCELLULAR_LOCATION().getTERM());
672    }
673
674    private boolean isSeqParTermsSimilar(physicalEntityParticipant fPep,
675                                            physicalEntityParticipant sPep) {
676        if( relatedTerms.isEmpty() ) {
677            for( String[] termL : termLists )  {
678                Set<String> termSet = new HashSet<String>();
679                termSet.addAll(Arrays.asList(termL));
680                relatedTerms.add(termSet);
681            }
682        }
683
684        if( fPep instanceof sequenceParticipant
685                && sPep instanceof sequenceParticipant ) {
686            for( sequenceFeature fsf : ((sequenceParticipant) fPep).getSEQUENCE_FEATURE_LIST() )
687                for( sequenceFeature ssf : ((sequenceParticipant) sPep).getSEQUENCE_FEATURE_LIST() )
688                    for(Set<String> similarTerm : relatedTerms)
689                        if( fsf.getFEATURE_TYPE() != null && ssf.getFEATURE_TYPE() != null)
690                            for( String fterm : fsf.getFEATURE_TYPE().getTERM() )
691                                for( String sterm : ssf.getFEATURE_TYPE().getTERM() )
692                                    if( similarTerm.contains(fterm) && similarTerm.contains(sterm))
693                                        return true;
694        }
695
696        return false;
697    }
698
699    private PEPScore getScore(Map<physicalEntityParticipant,
700                                                Map<physicalEntityParticipant, Double>> pepScoreMatrix,
701                              Set<physicalEntityParticipant> PEPs1, Set<physicalEntityParticipant> PEPs2) {
702        Double finalScore = 1.0;
703
704        // This is the 1-to-1 mapping of the PEPs
705        // PEPs1 -> PEPs2
706        Map<physicalEntityParticipant, physicalEntityParticipant> pepMap
707                = new HashMap<physicalEntityParticipant, physicalEntityParticipant>();
708
709        /*
710         * If the second set is smaller than the first one,
711         * then because of the scoring algorithm, the matix
712         * should be used transposed.
713         */
714        boolean transposeMatrix;
715        Set<physicalEntityParticipant> firstSet, secondSet;
716        int minSize, sizeDiff;
717
718        if( PEPs2.size() > PEPs1.size() ) {
719            transposeMatrix = false;
720            firstSet = PEPs1;
721            secondSet = PEPs2;
722        } else {
723            transposeMatrix = true;
724            firstSet = PEPs2;
725            secondSet = PEPs1;
726        }
727
728        sizeDiff = secondSet.size() - firstSet.size();
729        // Extra penalty for one-side-conversions (e.g. ubiquination)
730        minSize = firstSet.size() == 0 ? secondSet.size() : firstSet.size();
731
732        for(physicalEntityParticipant pep1 : firstSet) {
733            // We're gonna fill the set with scores, and get the maximum
734            Map<Double, physicalEntityParticipant> scoreSet
735                    = new HashMap<Double, physicalEntityParticipant>();
736
737            for(physicalEntityParticipant pep2 : secondSet) {
738                Double pepScore;
739                if( (transposeMatrix
740                        ? complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY())
741                        : complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY())) ) {
742                   pepScore = (transposeMatrix
743                                            ? pepScoreMatrix.get(pep2).get(pep1)
744                                            : pepScoreMatrix.get(pep1).get(pep2)
745                                      );
746                } else {
747                    pepScore = this.BASE_SCORE;
748                }
749
750                scoreSet.put(pepScore, pep2);
751            }
752
753            // We have the scores, let's get the maximum
754            Double maxScore = Collections.max(scoreSet.keySet());
755
756            // We know the best match, multiply its score with the finalScore
757            finalScore *= maxScore;
758
759            // Check for transposed matrix
760            if(transposeMatrix)
761                pepMap.put(scoreSet.get(maxScore), pep1);
762            else
763                pepMap.put(pep1, scoreSet.get(maxScore));
764        }
765
766        // Here comes the last edit to final score
767        finalScore = (finalScore / Math.pow(MAX_PEP_SCORE, minSize))        // Rate actual score over max.
768                                * Math.pow(SIZE_MISMATCH_PENALTY, sizeDiff); // Give penalty for size mismatches
769
770        return new PEPScore(finalScore, pepMap);
771    }
772
773    private ConversionScore getScore(Map<physicalEntityParticipant,
774                                                Map<physicalEntityParticipant, Double>> pepScoreMatrix,
775                                     conversion conv1, conversion conv2) {
776        boolean reverseMatch;
777        Double score;
778        Map<physicalEntityParticipant, physicalEntityParticipant> pepMap
779            = new HashMap<physicalEntityParticipant, physicalEntityParticipant>();
780
781        // left-to-left, right-to-right, left-to-right, right-to-left
782        PEPScore l_l, r_r, l_r, r_l;
783
784        /* Two possiblity for a match, check for them and get the best match */
785
786        // 1# left->left , right->right (aka "straight")
787        l_l = getScore(pepScoreMatrix, conv1.getLEFT(), conv2.getLEFT());
788        r_r = getScore(pepScoreMatrix, conv1.getRIGHT(), conv2.getRIGHT());
789        Double straightScore = l_l.getScore() * r_r.getScore();
790
791        // 2# left->right , right->left (aka "reverse")
792        l_r = getScore(pepScoreMatrix, conv1.getLEFT(), conv2.getRIGHT());
793        r_l = getScore(pepScoreMatrix, conv1.getRIGHT(), conv2.getLEFT());
794        Double reverseScore = l_r.getScore() * r_l.getScore();
795
796        /* */
797
798        if(straightScore >= reverseScore) { // Straight match
799            reverseMatch = false;
800            score = straightScore;
801            pepMap.putAll(l_l.getPEPMap());
802            pepMap.putAll(r_r.getPEPMap());
803        } else { // Reverse match
804            reverseMatch = true;
805            score = reverseScore;
806            pepMap.putAll(l_r.getPEPMap());
807            pepMap.putAll(r_l.getPEPMap());
808        }
809        score *= getScoresOver(); // (0,1] -> (0, Scores Over]
810        return new ConversionScore(conv1, conv2, score, pepMap, reverseMatch);
811    }
812
813
814    /* Update functions below are modified to fulfill required object editor
815       modifiying on the PEPs.
816      */
817    private void updatePepFields(physicalEntityParticipant update,
818                                    physicalEntityParticipant existing) {
819        if( !(update instanceof sequenceParticipant ^ existing instanceof sequenceParticipant) )
820            updateObjectFields(update, existing);
821    }
822
823    private void updateObjectFields(BioPAXElement update, BioPAXElement existing) {
824        Set<PropertyEditor> editors =  editorMap.getEditorsOf(update);
825
826        for (PropertyEditor editor : editors) {
827            if ( !editor.getProperty().equals("PHYSICAL-ENTITY") ) {
828                updateObjectFieldsForEditor(editor, update, existing);
829            }
830
831        }
832    }
833
834    private void updateObjectFieldsForEditor(PropertyEditor editor,
835                                                    BioPAXElement update,
836                                                    BioPAXElement existing) {
837
838                        for (Object updateValue : editor.getValueFromBean(update)) {
839                boolean notDuplicate = true;
840
841                try {
842                    if( updateValue instanceof BioPAXElement ) {
843                        for (Object existingValue : editor.getValueFromBean(existing)) {
844                            if( ((BioPAXElement) existingValue).isEquivalent((BioPAXElement) updateValue) ) {
845                                notDuplicate = false;
846                                break;
847                            }
848                        }
849                    }
850                } catch (IllegalArgumentException e) {
851                    log.info("Empty property on bean, skipping...");
852                }
853
854                if( notDuplicate )
855                    updateField(editor, updateValue, existing);
856                        }
857
858        }
859
860    private void updateField(PropertyEditor editor, Object updateValue,
861                           BioPAXElement existing) {
862                editor.setValueToBean(updateValue, existing); //TODO:TEST
863        }
864
865    /* End of update functions */
866
867    /* Method below are temporary but manual normalization for the time being */
868    private void normalizeXrefs(Model model) {
869        for(xref oneXref : model.getObjects(xref.class) ) {
870            for( String[] dbChange : dbChanges ) {
871                if( oneXref.getDB() != null )
872                    oneXref.setDB(oneXref.getDB().replace(dbChange[0], dbChange[1]));
873            }
874        }
875    }
876
877    private void normalizeOpenControlledVocabulary(Model model) {
878        for(openControlledVocabulary ocv1: target.getObjects(openControlledVocabulary.class)) {
879            for(openControlledVocabulary ocv2: model.getObjects(openControlledVocabulary.class)) {
880                if( isOCVsSemanticallyEquivalent(ocv1, ocv2) ) {
881                    equalize(ocv1, ocv2);
882                }
883            }
884        }
885        for(openControlledVocabulary ocv1: model.getObjects(openControlledVocabulary.class)) {
886            for(openControlledVocabulary ocv2: model.getObjects(openControlledVocabulary.class)) {
887                if( isOCVsSemanticallyEquivalent(ocv1, ocv2) ) {
888                    equalize(ocv1, ocv2);
889                }
890            }
891        }
892    }
893
894    private boolean isOCVsSemanticallyEquivalent(openControlledVocabulary ocv1, openControlledVocabulary ocv2) {
895        return ocv1.equals(ocv2) ||
896                   ( (ocv1.getXREF().isEmpty() || ocv2.getXREF().isEmpty())
897                                    ? OCVsHaveCommonTerm(ocv1, ocv2)
898                                    : (!ocv1.findCommonUnifications(ocv2).isEmpty()
899                                            || OCVsHaveCommonTerm(ocv1, ocv2)) );
900    }
901
902    private boolean OCVsHaveCommonTerm(openControlledVocabulary ocv1, openControlledVocabulary ocv2) {
903        for (String s : ocv1.getTERM()) {
904            if (ocv2.getTERM().contains(s)) {
905                return true;
906            }
907        }
908        return false;
909    }
910
911    private void normalizeCellularLocations(Model model) {
912        openControlledVocabulary mostlyUsed = null;
913        Integer maxOccurence = 0;
914
915        Map<openControlledVocabulary, Integer> termCounter
916                = new HashMap<openControlledVocabulary, Integer>();
917        for(BioPAXElement pep : target.getObjects(physicalEntityParticipant.class)) {
918            openControlledVocabulary ov
919                            = ((physicalEntityParticipant) pep).getCELLULAR_LOCATION();
920            if( ov == null )
921                continue;
922
923            Integer cnt = termCounter.get(ov);
924            if( cnt == null ) {
925                cnt = 0;
926                termCounter.put(ov, cnt);
927            }
928
929            cnt += 1;
930
931            if( cnt > maxOccurence )
932                mostlyUsed = ov;
933        }
934
935        if( mostlyUsed == null )
936            return;
937
938        ArrayList <physicalEntityParticipant> pepList = new ArrayList<physicalEntityParticipant>();
939        pepList.addAll( model.getObjects(physicalEntityParticipant.class) );
940
941        for(BioPAXElement pep : pepList) {
942            openControlledVocabulary ov
943                    = ((physicalEntityParticipant) pep).getCELLULAR_LOCATION();
944
945            if( ov == null ) {
946                if( model.getByID(mostlyUsed.getRDFId()) == null ) {
947                    ov = model.addNew(openControlledVocabulary.class, mostlyUsed.getRDFId());
948                    ov.setCOMMENT( mostlyUsed.getCOMMENT() );
949                    ov.setTERM( mostlyUsed.getTERM() );
950                    ov.setXREF( mostlyUsed.getXREF() );
951                } else {
952                    ov = (openControlledVocabulary) model.getByID(mostlyUsed.getRDFId());
953                }
954
955                ((physicalEntityParticipant) pep).setCELLULAR_LOCATION(ov);
956
957            } else if ( ov.getTERM().isEmpty() ) {
958                ov.setTERM( mostlyUsed.getTERM() );
959            } else if (isCellularLocsTermsSimilar(ov.getTERM(), mostlyUsed.getTERM())) {
960                ov.setTERM( mostlyUsed.getTERM() );
961                ov.setXREF( mostlyUsed.getXREF() );
962            }
963        }
964    }
965
966    /* End of normalization methods */
967
968}
969
970/**
971 * An encapsulation of the score and pep map
972 */
973class PEPScore {
974    private Double score;
975    private Map<physicalEntityParticipant, physicalEntityParticipant> pepMap;
976
977    public PEPScore(Double score,
978                    Map<physicalEntityParticipant, physicalEntityParticipant> pepMap) {
979        this.score = score;
980        this.pepMap = pepMap;
981    }
982
983    public Double getScore() {
984        return score;
985    }
986
987    public Map<physicalEntityParticipant, physicalEntityParticipant> getPEPMap() {
988        return pepMap;
989    }
990}