001package org.biopax.paxtools.controller; 002 003import org.apache.commons.logging.Log; 004import org.apache.commons.logging.LogFactory; 005import org.biopax.paxtools.model.BioPAXElement; 006import org.biopax.paxtools.model.Model; 007import org.biopax.paxtools.model.level2.*; 008 009import java.util.*; 010 011 012/** 013 * 014 * This class is intended to merge and to integrate biopax models 015 * not necessarily from the same resource - if models allow such a 016 * thing. This class has very similar functionality to the controller.Merger 017 * but it differs in means of merging/integrating methodology. 018 * 019 * Integrator iterates all the conversions in from the <em>target</em> and 020 * <em>source</em> model(s), and assigns scores indicating their similarity. 021 * After the scoring process is completed, it then starts integrating conversions 022 * having the highest score until it reaches the <em>threshold</em> value. After 023 * this conversion based integration is accomplished, all the models are merged 024 * into the <em>target</em>. 025 * 026 * Please note that this class is in its beta state. 027 */ 028public class Integrator { 029 030 private static final Log log = LogFactory.getLog(Integrator.class); 031 private EditorMap editorMap; 032 private Merger merger; 033 private Model target, mergedSources = null; 034 035 private boolean onlyMapping = false; 036 private boolean selfRemove = false; 037 private boolean normalizeModels = false; 038 039 /** 040 * This is the main score matrix 041 * 042 * | D | E | F | 043 * ------------------------------ 044 * A | | | | 045 * ------------------------------ 046 * B | | | | 047 * ------------------------------ 048 * C | | | | 049 * ------------------------------ 050 */ 051 private Map<physicalEntityParticipant, 052 Map<physicalEntityParticipant, Double>> pepScoreMatrix 053 = new HashMap<physicalEntityParticipant, 054 Map<physicalEntityParticipant, Double>>(); 055 056 /** 057 * This is the pool where the scores and relevant conversions 058 * will be stored. Other than this global one, there will be 059 * a local copy to enable the user handle different threshold 060 * values one at a time. 061 */ 062 private List<ConversionScore> similarConversions; 063 064 private final String[][] dbChanges 065 = { 066 {"Chemical Entities of Biological Interest", "ChEBI"} 067 }; 068 069 private Set<Set<String>> relatedTerms = new HashSet<Set<String>>(); 070 private String[][] termLists = 071 { 072 {"active", "active1", "active2", "phosphorylation", "phosphate group", "phosphorylation site"}, 073 {"inactive", "phosphorylation", "phosphate group", "phosphorylation site"} 074 075 }; 076 077 private String[][] locLists = 078 { 079 {"cytoplasm", "cytosol"} 080 }; 081 082 /* Globalling tricks & fine-tuning */ 083 private final double SIZE_MISMATCH_PENALTY = 0.7; 084 private final double BASE_SCORE = 0.4; 085 private double SCORES_OVER = 100.0; 086 private final double MAX_PEP_SCORE = 3.5; 087 088 private final double STATS_OVER = 1000.0; // For info messages like "2/100 completed" 089 090 private double threshold = SCORES_OVER; // Max. threshold 091 092 093 /** 094 * 095 * @param editorMap map to be used in order to initialize merger 096 * @param target target model into which integration will be done 097 * @param sources targets that are going to be integrated into target 098 * 099 * @see org.biopax.paxtools.controller.Merger 100 */ 101 public Integrator(EditorMap editorMap, Model target, Model... sources) { 102 this.editorMap = editorMap; 103 this.merger = new Merger(editorMap); 104 this.target = target; 105 106 log.info(sources.length + " source model(s) will be merged."); 107 // Merge all "sources" into one single model 108 for(Model source : sources) { 109 if( mergedSources == null ) 110 mergedSources = source; 111 else 112 merger.merge(mergedSources, source); 113 } 114 log.info("Merging finished."); 115 116 if( isNormalizeModels() ) { 117 log.info("Normalizing models."); 118 119 log.info("Normaling XREFs."); 120 normalizeXrefs(target); 121 normalizeXrefs(mergedSources); 122 log.info("Normaling OCVs."); 123 normalizeOpenControlledVocabulary(mergedSources); 124 log.info("Normaling cellular locations."); 125 normalizeCellularLocations(mergedSources); 126 127 log.info("Normalization completed."); 128 } 129 } 130 131 /** 132 * Sets the threshold value (the smallest score for integrating 133 * two conversions) 134 * 135 * @param threshold value 136 * 137 * @see #setScoresOver(double) 138 */ 139 public void setThreshold(Double threshold) { 140 this.threshold = threshold; 141 } 142 143 /** 144 * Returns the threshold value (the smallest score for integrating 145 * two conversions) 146 * 147 * @return a double value (default: 100.0) 148 */ 149 public Double getThreshold() { 150 return threshold; 151 } 152 153 /** 154 * Enables/disables integration. If <em>only mapping</em> feature is 155 * set to true, integrator will only assign scores to conversion and 156 * exits. This option may help to build interactive programs. 157 * 158 * @param mapping true for skipping integration 159 * 160 * @see #integrate() 161 */ 162 public void setOnlyMapping(boolean mapping) { 163 this.onlyMapping = mapping; 164 } 165 166 /** 167 * 168 * @return true for enabled "only mapping", false otherwise 169 * 170 * @see #setOnlyMapping(boolean) 171 */ 172 boolean isOnlyMapping() { 173 return onlyMapping; 174 } 175 176 /** 177 * Enables removal of elements from the <em>target</em> if they are contained 178 * both in source and target, and have a match with another conversion. Useful for 179 * integrating of a model by itself. Default is false. 180 * 181 * @param selfRemove true for enabling removal, false otherwise 182 */ 183 public void setSelfRemove(boolean selfRemove) { 184 this.selfRemove = selfRemove; 185 } 186 187 /** 188 * 189 * @return true for enabled removal, false otherwise 190 * 191 * @see #setSelfRemove(boolean) 192 */ 193 boolean isSelfRemove() { 194 return selfRemove; 195 } 196 197 /** 198 * Fixes some of the known Open Controlled Vocabullary issues in the models. 199 * It is best to try integration with this option enabled (true) and 200 * disabled (false) to see which gives a better result. Default is false. 201 * 202 * @param normalizeModels true for normalization of OCVs 203 */ 204 public void setNormalizeModels(boolean normalizeModels) { 205 this.normalizeModels = normalizeModels; 206 } 207 208 /** 209 * 210 * @return true for normalization of OCVs, false otherwise (default) 211 * 212 * @see #setNormalizeModels(boolean) 213 */ 214 boolean isNormalizeModels() { 215 return normalizeModels; 216 } 217 218 219 /** 220 * @see #setScoresOver(double) 221 * 222 * @return a double indicating maximum score 223 */ 224 double getScoresOver() { 225 return SCORES_OVER; 226 } 227 228 /** 229 * A score between two conversions is in the interval (0, 1]. 230 * Setting a <em>scoresOver</em> value will the map this range to 231 * (0, scoresOver]. Default value is 100.0, so the default score 232 * range is (0,100]. This setting does not alter the integration 233 * process. It only multiplies the scores with the given value. 234 * 235 * @param scoresOver a double score 236 */ 237 public void setScoresOver(double scoresOver) { 238 this.SCORES_OVER = scoresOver; 239 } 240 241 /** 242 * Integrates <em>target</em> and <em>source</em>(s) and returns a 243 * sorted (desc) list of conversion scores. 244 * 245 * @see #setNormalizeModels(boolean) 246 * @see #setOnlyMapping(boolean) 247 * @see #setScoresOver(double) 248 * @see #setSelfRemove(boolean) 249 * @see #setThreshold(Double) 250 * 251 * @return a sorted list of ConversionScores 252 */ 253 public List<ConversionScore> integrate() { 254 return integrate(null); 255 } 256 257 /** 258 * Does the integration using user-provided scores list. 259 * 260 * @see #integrate() 261 * 262 * @param alternativeScores alternative scores, can be null 263 * @return a sorted list of ConversionScores 264 */ 265 public List<ConversionScore> integrate(List<ConversionScore> alternativeScores) { 266 Map<physicalEntityParticipant, Map<physicalEntityParticipant, Double>> 267 pepScoreMatrix = this.pepScoreMatrix; 268 List<ConversionScore> similarConversions; 269 // There is something wrong with the sources, just quit 270 if(mergedSources == null) { 271 log.warn("Either target or source is empty, skipping integration."); 272 return null; 273 } 274 275 log.info("Scoring all the PEPs."); 276 /* If it is a first run, we need to calculate all scores, 277 * but if it is not, we can save some CPU time. 278 */ 279 if( pepScoreMatrix.isEmpty() ) { // first run 280 createPEPScoreMatrix(target.getObjects(physicalEntityParticipant.class), 281 mergedSources.getObjects(physicalEntityParticipant.class)); 282 283 log.info("Scoring PEPs finished."); 284 285 log.info("Scoring conversions"); 286 this.similarConversions = createConversionScoreMap(pepScoreMatrix, 287 target.getObjects(conversion.class), 288 mergedSources.getObjects(conversion.class)); 289 log.info("Scoring conversions finished."); 290 } 291 292 if(this.similarConversions == null) 293 this.similarConversions = new ArrayList<ConversionScore>(); 294 295 /* Original score matrixes won't be modified for a later use 296 * Instead, we are going to copy them, and modify their copies. 297 */ 298 log.info("Creating a copy of the PEP scores."); 299 Map<physicalEntityParticipant, 300 Map<physicalEntityParticipant, Double>> copyMatrix 301 = new HashMap<physicalEntityParticipant, Map<physicalEntityParticipant, Double>>(); 302 // Copy the contents of the matrix 303 for(physicalEntityParticipant pepKey: pepScoreMatrix.keySet()) { 304 copyMatrix.put(pepKey, 305 new HashMap<physicalEntityParticipant, Double>(pepScoreMatrix.get(pepKey))); 306 } 307 // We want to use the copy now 308 pepScoreMatrix = copyMatrix; 309 log.info("PEP scores copied."); 310 311 similarConversions = (alternativeScores == null) 312 ? new ArrayList<ConversionScore>(this.similarConversions) 313 : alternativeScores; 314 315 log.info("Conversion scores copied."); 316 /* End of copies */ 317 318 log.info("Mapping conversions/PEPs with a threshold: " + getThreshold()); 319 mapConversions(similarConversions, pepScoreMatrix); 320 log.info("Mapping finished."); 321 322 // Sorting is essential for #equalizeEntities. If you are to 323 // modify this sort, check there also! 324 log.info("Sorting scores (" + similarConversions.size() + " scores)."); 325 Collections.sort(similarConversions); 326 Collections.reverse(similarConversions); 327 log.info("Sorting finished."); 328 329 if( isOnlyMapping() ) { 330 log.info("Skipping model integration."); 331 } else { 332 log.info("Entities of similar conversions are being eqalized."); 333 equalizeEntities(similarConversions); 334 335 log.info("Merging integrated models."); 336 merger.merge(target, mergedSources); 337 log.info("Merging finished."); 338 } 339 340 log.info("Integration completed."); 341 return similarConversions; 342 } 343 344 private void equalizeEntities(List<ConversionScore> similarConversions) { 345 Set<conversion> doNotModifySet = new HashSet<conversion>(); 346 Set<ConversionScore> containsSelfRemoved = new HashSet<ConversionScore>(); 347 348 for(ConversionScore convScore: similarConversions) { 349 // Since we sorted the list, we are safe to break 350 // But a continue will also do the trick, mostly 351 // requiring little more time 352 if( convScore.getScore() < getThreshold() ) 353 break; 354 355 conversion conv1 = convScore.getConversion1(), 356 conv2 = convScore.getConversion2(); 357 // If they are already the same, pass 358 if( conv1.getRDFId().equals(conv2.getRDFId()) ) 359 continue; 360 361 // Do not modify it twice 362 if( doNotModifySet.contains(conv2) ) { 363 log.info(conv2.getRDFId() + " has already been modified. Skipped."); 364 continue; 365 } 366 367 // Self remove operations 368 if( isSelfRemove() ) { 369 // Remove "conv2" from target, if the corresponding flag is set true 370 BioPAXElement eqBPE = target.getByID(conv2.getRDFId()); 371 if( eqBPE != null ) { 372 target.remove( eqBPE ); 373 log.info("Self removing: " + eqBPE.getRDFId()); 374 375 // Collect other matches of will-be-removed element. 376 for(ConversionScore tempCS: similarConversions) { 377 if( tempCS.getConversion1().equals(eqBPE) ) 378 containsSelfRemoved.add(tempCS); 379 } 380 } else if( containsSelfRemoved.contains(convScore) ) 381 continue; 382 } 383 384 // Three things to make equal: conversions themselves, matched PEPs, their controls 385 equalize(conv1, conv2); 386 if( convScore.isReverseMatch() ) 387 changeDirection(conv2); 388 389 for(physicalEntityParticipant pep1: convScore.getMatchedPEPs() ) { 390 physicalEntityParticipant pep2 = convScore.getMatch(pep1); 391 // We got the match, now set lets build sets of PEPs of equal states 392 equalizePEP(pep1, pep2); 393 } // End of score maximazing 394 395 for( control control1: conv1.isCONTROLLEDOf() ) { 396 for( control control2: conv2.isCONTROLLEDOf() ) { 397 boolean allSimilar = true; 398 for(physicalEntityParticipant controller1: control1.getCONTROLLER() ) { 399 for(physicalEntityParticipant controller2: control2.getCONTROLLER() ) { 400 if( getScore(controller1, controller2) > BASE_SCORE ) { 401 equalizePEP(controller1, controller2); 402 } else { 403 allSimilar = false; 404 } 405 } 406 } 407 if( allSimilar // size 0 causes false equivalance, thus regard that case 408 && !(control1.getCONTROLLER().size() == 0 ^ control2.getCONTROLLER().size() == 0)) 409 { 410 equalize(control1, control2); 411 412 if( convScore.isReverseMatch() && control2 instanceof catalysis) 413 changeDirection((catalysis) control2); 414 } 415 } 416 } 417 418 // We are done with conv2 419 doNotModifySet.add(conv2); 420 } 421 } 422 423 /** 424 * @deprecated setRDFId is not available anymore! 425 */ 426 private void equalize(BioPAXElement e1, BioPAXElement e2) { 427 // Operation below is enough for the time being 428 // TODO re-factoring: setRDFId is not available anymore! (We don't really want to change rdfIDs, do we?..) 429 //e2.setRDFId(e1.getRDFId()); 430 431 throw new UnsupportedOperationException("This needs re-factoring: bpe.setRDFId is not available anymore!"); 432 433 //TODO ? use some alternative way to store that a1 equals e2, e.g., Set<String> matched, 434 //matched.add(e1.getRDFId()+e2.getRDFId()); matched.add(e2.getRDFId()+e1.getRDFId()); 435 } 436 437 private boolean equals(BioPAXElement a, BioPAXElement b) { 438 throw new UnsupportedOperationException("not implemented yet."); 439 // TODO ? implement equals(BioPAXElement a, BioPAXElement b): can be smth. like the following... and use below 440 //return (a == null) ? b == null : a.equals(b) || matched.contains(a.getRDFId()+b.getRDFId()); 441 } 442 443 private void equalizePEP(physicalEntityParticipant controller1, physicalEntityParticipant controller2) { 444 // There is a special case for PEPs: we also need to update equivalent PEPs' fields 445 Set<physicalEntityParticipant> tempEqvPeps = new HashSet<physicalEntityParticipant>(); 446 tempEqvPeps.addAll(getEquivalentsOfPEP(controller1)); 447 tempEqvPeps.addAll(getEquivalentsOfPEP(controller2)); 448 for(physicalEntityParticipant eqPep : tempEqvPeps) 449 updatePepFields(eqPep, controller2); 450 451 for(physicalEntityParticipant eqPep : tempEqvPeps) 452 updatePepFields(controller2, eqPep); 453 454 equalize(controller1, controller2); 455 } 456 457 private Set<physicalEntityParticipant> getEquivalentsOfPEP(physicalEntityParticipant onePep) { 458 Set<physicalEntityParticipant> eqGrp = new HashSet<physicalEntityParticipant>(); 459 for(physicalEntityParticipant aPep : onePep.getPHYSICAL_ENTITY().isPHYSICAL_ENTITYof() ) { 460 if(aPep.isInEquivalentState(onePep)) 461 eqGrp.add(aPep); 462 } 463 464 return eqGrp; 465 } 466 467 private void changeDirection(conversion conv) { 468 SpontaneousType st = conv.getSPONTANEOUS(); 469 470 /* One possibility is below, but no need to operate 471 if( st == ConversionDirectionType.NOT_SPONTANEOUS || st == null ) 472 return; 473 */ 474 if( st == SpontaneousType.L_R ) 475 conv.setSPONTANEOUS(SpontaneousType.R_L); 476 else if( st == SpontaneousType.R_L ) 477 conv.setSPONTANEOUS(SpontaneousType.L_R); 478 } 479 480 private void changeDirection(catalysis cat) { 481 Direction ct = cat.getDIRECTION(); 482 483 if( ct == Direction.IRREVERSIBLE_LEFT_TO_RIGHT ) 484 cat.setDIRECTION(Direction.IRREVERSIBLE_RIGHT_TO_LEFT); 485 else if( ct == Direction.IRREVERSIBLE_RIGHT_TO_LEFT ) 486 cat.setDIRECTION(Direction.IRREVERSIBLE_LEFT_TO_RIGHT); 487 else if( ct == Direction.PHYSIOL_LEFT_TO_RIGHT ) 488 cat.setDIRECTION(Direction.PHYSIOL_RIGHT_TO_LEFT); 489 else if( ct == Direction.PHYSIOL_RIGHT_TO_LEFT) 490 cat.setDIRECTION(Direction.PHYSIOL_LEFT_TO_RIGHT); 491 492 /* One possibility is below, but no need to operate 493 else if( ct == CatalysisDirection.REVERSIBLE) 494 return; 495 */ 496 } 497 498 private void mapConversions(Collection<ConversionScore> similarConversions, 499 Map<physicalEntityParticipant, 500 Map<physicalEntityParticipant, Double>> pepScoreMatrix) { 501 // To get rid of Concurrent modification :| 502 Set<ConversionScore> toBeUpdated = new HashSet<ConversionScore>(); 503 504 for(ConversionScore convScore : similarConversions) { 505 // Check if the score is equal to or higher than the threshold 506 if( convScore.getScore() < getThreshold() ) 507 continue; 508 509 // Get matches of PEPs of first conversion 510 for(physicalEntityParticipant pep1: convScore.getMatchedPEPs() ) { 511 physicalEntityParticipant pep2 = convScore.getMatch(pep1); 512 513 // We got the match, now set their score to max 514 pepScoreMatrix.get(pep1).put(pep2, MAX_PEP_SCORE); 515 } // End of score maximazing 516 517 // Remember this 518 toBeUpdated.add(convScore); 519 } 520 521 // Now we know which scores are affected, lets replace them 522 for(ConversionScore convScore: toBeUpdated) { 523 // Remove it from similarConversion 524 similarConversions.remove(convScore); 525 526 // Add new score 527 similarConversions.add( getScore(pepScoreMatrix, 528 convScore.getConversion1(), 529 convScore.getConversion2()) ); 530 } 531 532 } 533 534 private List<ConversionScore> createConversionScoreMap(Map<physicalEntityParticipant, 535 Map<physicalEntityParticipant, Double>> pepScoreMatrix, 536 Set<conversion> convSet1, Set<conversion> convSet2) { 537 List<ConversionScore> similarConversions = new ArrayList<ConversionScore>(); 538 539 double totalSize = convSet1.size() * convSet2.size(); 540 double convCnt = 0; 541 542 for(conversion conv1: convSet1) { 543 for(conversion conv2: convSet2) { 544 // No need to compare conversions of different types 545 if( !((conv1 instanceof biochemicalReaction && conv2 instanceof biochemicalReaction) 546 || (conv1 instanceof complexAssembly && conv2 instanceof complexAssembly) 547 || (conv1 instanceof transport && conv2 instanceof transport)) ) 548 { 549 convCnt++; 550 continue; 551 } 552 553 if( conv1.getRDFId().equals(conv2.getRDFId())) { // If they are the same 554 convCnt++; 555 continue; 556 } 557 558 ConversionScore convScore = getScore(pepScoreMatrix, conv1, conv2); 559 similarConversions.add(convScore); 560 561 if( convCnt % Math.ceil(totalSize/STATS_OVER) == 0 ) { 562 log.info( " - " + (convCnt / Math.ceil(totalSize/STATS_OVER)) 563 + "/" + STATS_OVER + " completed."); 564 } 565 566 convCnt++; 567 568 } 569 } 570 571 return similarConversions; 572 } 573 574 private void createPEPScoreMatrix(Collection<physicalEntityParticipant> pepSet1, 575 Collection<physicalEntityParticipant> pepSet2) { 576 577 // If it is not empty, no need to calculate it again 578 assert pepScoreMatrix.isEmpty(); 579 double totalSize = pepSet1.size() * pepSet2.size(); 580 581 double pepCnt = 0; 582 for(physicalEntityParticipant pep1 : pepSet1) { 583 // Create a new row for a PEP 584 Map<physicalEntityParticipant, Double> pep1Row 585 = new HashMap<physicalEntityParticipant, Double>(); 586 pepScoreMatrix.put(pep1, pep1Row); 587 588 // Fill the row with the corresponding scores 589 for(physicalEntityParticipant pep2 : pepSet2) { 590 if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(), 591 pep2.getPHYSICAL_ENTITY()) ) { 592 Double score = getScore(pep1, pep2); 593 pep1Row.put(pep2, score); 594 } 595 596 if( pepCnt % Math.ceil(totalSize/STATS_OVER) == 0 ) { 597 log.info( " - " + (pepCnt / Math.ceil(totalSize/STATS_OVER)) 598 + "/" + STATS_OVER + " completed."); 599 } 600 601 pepCnt++; 602 } 603 } 604 605 } 606 607 private boolean complexScoreHelper(physicalEntity cPe, physicalEntity pe) { 608 if(cPe instanceof complex && pe instanceof complex) { 609 for(physicalEntityParticipant tmpPep : ((complex) cPe) .getCOMPONENTS() ) { 610 if( !complexScoreHelper(pe, tmpPep.getPHYSICAL_ENTITY()) ) 611 return false; 612 } 613 return true; 614 } else if( cPe instanceof complex ) { 615 for(physicalEntityParticipant tmpPep : ((complex) cPe) .getCOMPONENTS() ) { 616 if( complexScoreHelper(tmpPep.getPHYSICAL_ENTITY(), pe) ) 617 return true; 618 } 619 return false; 620 } else { 621 return cPe.equals(pe); 622 } 623 } 624 625 private Double getScore(physicalEntityParticipant pep1, 626 physicalEntityParticipant pep2) { 627 double totalScore = .0; 628 629 if((pep1 instanceof sequenceParticipant ^ pep2 instanceof sequenceParticipant) 630 && !(pep1.getPHYSICAL_ENTITY() instanceof smallMolecule 631 && pep2.getPHYSICAL_ENTITY() instanceof smallMolecule) ) 632 return BASE_SCORE; 633 634 if( pep1.getPHYSICAL_ENTITY().equals(pep2.getPHYSICAL_ENTITY()) ) 635 totalScore += 2.5; 636 else if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY()) 637 && complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY()) ) 638 totalScore += 2.35; 639 else if( complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY()) 640 || complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY()) ) 641 totalScore += 2; 642 else 643 return BASE_SCORE; 644 645 if( pep1.isInEquivalentState(pep2) ) 646 totalScore += 1; 647 else { 648 if( isSeqParTermsSimilar(pep1, pep2) ) 649 totalScore += .8; 650 else if( isCellularLocsSimilar(pep1, pep2) ) 651 totalScore += .8; 652 } 653 654 return totalScore; 655 } 656 657 private boolean isCellularLocsTermsSimilar(Set<String> fTerms, Set<String> sTerms) { 658 for( String[] locList : locLists ) 659 for( String fterm : fTerms ) 660 for( String sterm : sTerms ) 661 if( Arrays.asList(locList).contains(fterm) && Arrays.asList(locList).contains(sterm)) 662 return true; 663 664 return false; 665 } 666 667 private boolean isCellularLocsSimilar(physicalEntityParticipant fPep, 668 physicalEntityParticipant sPep) { 669 return !(fPep.getCELLULAR_LOCATION() != null && sPep.getCELLULAR_LOCATION() != null) 670 || isCellularLocsTermsSimilar(fPep.getCELLULAR_LOCATION().getTERM(), 671 sPep.getCELLULAR_LOCATION().getTERM()); 672 } 673 674 private boolean isSeqParTermsSimilar(physicalEntityParticipant fPep, 675 physicalEntityParticipant sPep) { 676 if( relatedTerms.isEmpty() ) { 677 for( String[] termL : termLists ) { 678 Set<String> termSet = new HashSet<String>(); 679 termSet.addAll(Arrays.asList(termL)); 680 relatedTerms.add(termSet); 681 } 682 } 683 684 if( fPep instanceof sequenceParticipant 685 && sPep instanceof sequenceParticipant ) { 686 for( sequenceFeature fsf : ((sequenceParticipant) fPep).getSEQUENCE_FEATURE_LIST() ) 687 for( sequenceFeature ssf : ((sequenceParticipant) sPep).getSEQUENCE_FEATURE_LIST() ) 688 for(Set<String> similarTerm : relatedTerms) 689 if( fsf.getFEATURE_TYPE() != null && ssf.getFEATURE_TYPE() != null) 690 for( String fterm : fsf.getFEATURE_TYPE().getTERM() ) 691 for( String sterm : ssf.getFEATURE_TYPE().getTERM() ) 692 if( similarTerm.contains(fterm) && similarTerm.contains(sterm)) 693 return true; 694 } 695 696 return false; 697 } 698 699 private PEPScore getScore(Map<physicalEntityParticipant, 700 Map<physicalEntityParticipant, Double>> pepScoreMatrix, 701 Set<physicalEntityParticipant> PEPs1, Set<physicalEntityParticipant> PEPs2) { 702 Double finalScore = 1.0; 703 704 // This is the 1-to-1 mapping of the PEPs 705 // PEPs1 -> PEPs2 706 Map<physicalEntityParticipant, physicalEntityParticipant> pepMap 707 = new HashMap<physicalEntityParticipant, physicalEntityParticipant>(); 708 709 /* 710 * If the second set is smaller than the first one, 711 * then because of the scoring algorithm, the matix 712 * should be used transposed. 713 */ 714 boolean transposeMatrix; 715 Set<physicalEntityParticipant> firstSet, secondSet; 716 int minSize, sizeDiff; 717 718 if( PEPs2.size() > PEPs1.size() ) { 719 transposeMatrix = false; 720 firstSet = PEPs1; 721 secondSet = PEPs2; 722 } else { 723 transposeMatrix = true; 724 firstSet = PEPs2; 725 secondSet = PEPs1; 726 } 727 728 sizeDiff = secondSet.size() - firstSet.size(); 729 // Extra penalty for one-side-conversions (e.g. ubiquination) 730 minSize = firstSet.size() == 0 ? secondSet.size() : firstSet.size(); 731 732 for(physicalEntityParticipant pep1 : firstSet) { 733 // We're gonna fill the set with scores, and get the maximum 734 Map<Double, physicalEntityParticipant> scoreSet 735 = new HashMap<Double, physicalEntityParticipant>(); 736 737 for(physicalEntityParticipant pep2 : secondSet) { 738 Double pepScore; 739 if( (transposeMatrix 740 ? complexScoreHelper(pep2.getPHYSICAL_ENTITY(), pep1.getPHYSICAL_ENTITY()) 741 : complexScoreHelper(pep1.getPHYSICAL_ENTITY(), pep2.getPHYSICAL_ENTITY())) ) { 742 pepScore = (transposeMatrix 743 ? pepScoreMatrix.get(pep2).get(pep1) 744 : pepScoreMatrix.get(pep1).get(pep2) 745 ); 746 } else { 747 pepScore = this.BASE_SCORE; 748 } 749 750 scoreSet.put(pepScore, pep2); 751 } 752 753 // We have the scores, let's get the maximum 754 Double maxScore = Collections.max(scoreSet.keySet()); 755 756 // We know the best match, multiply its score with the finalScore 757 finalScore *= maxScore; 758 759 // Check for transposed matrix 760 if(transposeMatrix) 761 pepMap.put(scoreSet.get(maxScore), pep1); 762 else 763 pepMap.put(pep1, scoreSet.get(maxScore)); 764 } 765 766 // Here comes the last edit to final score 767 finalScore = (finalScore / Math.pow(MAX_PEP_SCORE, minSize)) // Rate actual score over max. 768 * Math.pow(SIZE_MISMATCH_PENALTY, sizeDiff); // Give penalty for size mismatches 769 770 return new PEPScore(finalScore, pepMap); 771 } 772 773 private ConversionScore getScore(Map<physicalEntityParticipant, 774 Map<physicalEntityParticipant, Double>> pepScoreMatrix, 775 conversion conv1, conversion conv2) { 776 boolean reverseMatch; 777 Double score; 778 Map<physicalEntityParticipant, physicalEntityParticipant> pepMap 779 = new HashMap<physicalEntityParticipant, physicalEntityParticipant>(); 780 781 // left-to-left, right-to-right, left-to-right, right-to-left 782 PEPScore l_l, r_r, l_r, r_l; 783 784 /* Two possiblity for a match, check for them and get the best match */ 785 786 // 1# left->left , right->right (aka "straight") 787 l_l = getScore(pepScoreMatrix, conv1.getLEFT(), conv2.getLEFT()); 788 r_r = getScore(pepScoreMatrix, conv1.getRIGHT(), conv2.getRIGHT()); 789 Double straightScore = l_l.getScore() * r_r.getScore(); 790 791 // 2# left->right , right->left (aka "reverse") 792 l_r = getScore(pepScoreMatrix, conv1.getLEFT(), conv2.getRIGHT()); 793 r_l = getScore(pepScoreMatrix, conv1.getRIGHT(), conv2.getLEFT()); 794 Double reverseScore = l_r.getScore() * r_l.getScore(); 795 796 /* */ 797 798 if(straightScore >= reverseScore) { // Straight match 799 reverseMatch = false; 800 score = straightScore; 801 pepMap.putAll(l_l.getPEPMap()); 802 pepMap.putAll(r_r.getPEPMap()); 803 } else { // Reverse match 804 reverseMatch = true; 805 score = reverseScore; 806 pepMap.putAll(l_r.getPEPMap()); 807 pepMap.putAll(r_l.getPEPMap()); 808 } 809 score *= getScoresOver(); // (0,1] -> (0, Scores Over] 810 return new ConversionScore(conv1, conv2, score, pepMap, reverseMatch); 811 } 812 813 814 /* Update functions below are modified to fulfill required object editor 815 modifiying on the PEPs. 816 */ 817 private void updatePepFields(physicalEntityParticipant update, 818 physicalEntityParticipant existing) { 819 if( !(update instanceof sequenceParticipant ^ existing instanceof sequenceParticipant) ) 820 updateObjectFields(update, existing); 821 } 822 823 private void updateObjectFields(BioPAXElement update, BioPAXElement existing) { 824 Set<PropertyEditor> editors = editorMap.getEditorsOf(update); 825 826 for (PropertyEditor editor : editors) { 827 if ( !editor.getProperty().equals("PHYSICAL-ENTITY") ) { 828 updateObjectFieldsForEditor(editor, update, existing); 829 } 830 831 } 832 } 833 834 private void updateObjectFieldsForEditor(PropertyEditor editor, 835 BioPAXElement update, 836 BioPAXElement existing) { 837 838 for (Object updateValue : editor.getValueFromBean(update)) { 839 boolean notDuplicate = true; 840 841 try { 842 if( updateValue instanceof BioPAXElement ) { 843 for (Object existingValue : editor.getValueFromBean(existing)) { 844 if( ((BioPAXElement) existingValue).isEquivalent((BioPAXElement) updateValue) ) { 845 notDuplicate = false; 846 break; 847 } 848 } 849 } 850 } catch (IllegalArgumentException e) { 851 log.info("Empty property on bean, skipping..."); 852 } 853 854 if( notDuplicate ) 855 updateField(editor, updateValue, existing); 856 } 857 858 } 859 860 private void updateField(PropertyEditor editor, Object updateValue, 861 BioPAXElement existing) { 862 editor.setValueToBean(updateValue, existing); //TODO:TEST 863 } 864 865 /* End of update functions */ 866 867 /* Method below are temporary but manual normalization for the time being */ 868 private void normalizeXrefs(Model model) { 869 for(xref oneXref : model.getObjects(xref.class) ) { 870 for( String[] dbChange : dbChanges ) { 871 if( oneXref.getDB() != null ) 872 oneXref.setDB(oneXref.getDB().replace(dbChange[0], dbChange[1])); 873 } 874 } 875 } 876 877 private void normalizeOpenControlledVocabulary(Model model) { 878 for(openControlledVocabulary ocv1: target.getObjects(openControlledVocabulary.class)) { 879 for(openControlledVocabulary ocv2: model.getObjects(openControlledVocabulary.class)) { 880 if( isOCVsSemanticallyEquivalent(ocv1, ocv2) ) { 881 equalize(ocv1, ocv2); 882 } 883 } 884 } 885 for(openControlledVocabulary ocv1: model.getObjects(openControlledVocabulary.class)) { 886 for(openControlledVocabulary ocv2: model.getObjects(openControlledVocabulary.class)) { 887 if( isOCVsSemanticallyEquivalent(ocv1, ocv2) ) { 888 equalize(ocv1, ocv2); 889 } 890 } 891 } 892 } 893 894 private boolean isOCVsSemanticallyEquivalent(openControlledVocabulary ocv1, openControlledVocabulary ocv2) { 895 return ocv1.equals(ocv2) || 896 ( (ocv1.getXREF().isEmpty() || ocv2.getXREF().isEmpty()) 897 ? OCVsHaveCommonTerm(ocv1, ocv2) 898 : (!ocv1.findCommonUnifications(ocv2).isEmpty() 899 || OCVsHaveCommonTerm(ocv1, ocv2)) ); 900 } 901 902 private boolean OCVsHaveCommonTerm(openControlledVocabulary ocv1, openControlledVocabulary ocv2) { 903 for (String s : ocv1.getTERM()) { 904 if (ocv2.getTERM().contains(s)) { 905 return true; 906 } 907 } 908 return false; 909 } 910 911 private void normalizeCellularLocations(Model model) { 912 openControlledVocabulary mostlyUsed = null; 913 Integer maxOccurence = 0; 914 915 Map<openControlledVocabulary, Integer> termCounter 916 = new HashMap<openControlledVocabulary, Integer>(); 917 for(BioPAXElement pep : target.getObjects(physicalEntityParticipant.class)) { 918 openControlledVocabulary ov 919 = ((physicalEntityParticipant) pep).getCELLULAR_LOCATION(); 920 if( ov == null ) 921 continue; 922 923 Integer cnt = termCounter.get(ov); 924 if( cnt == null ) { 925 cnt = 0; 926 termCounter.put(ov, cnt); 927 } 928 929 cnt += 1; 930 931 if( cnt > maxOccurence ) 932 mostlyUsed = ov; 933 } 934 935 if( mostlyUsed == null ) 936 return; 937 938 ArrayList <physicalEntityParticipant> pepList = new ArrayList<physicalEntityParticipant>(); 939 pepList.addAll( model.getObjects(physicalEntityParticipant.class) ); 940 941 for(BioPAXElement pep : pepList) { 942 openControlledVocabulary ov 943 = ((physicalEntityParticipant) pep).getCELLULAR_LOCATION(); 944 945 if( ov == null ) { 946 if( model.getByID(mostlyUsed.getRDFId()) == null ) { 947 ov = model.addNew(openControlledVocabulary.class, mostlyUsed.getRDFId()); 948 ov.setCOMMENT( mostlyUsed.getCOMMENT() ); 949 ov.setTERM( mostlyUsed.getTERM() ); 950 ov.setXREF( mostlyUsed.getXREF() ); 951 } else { 952 ov = (openControlledVocabulary) model.getByID(mostlyUsed.getRDFId()); 953 } 954 955 ((physicalEntityParticipant) pep).setCELLULAR_LOCATION(ov); 956 957 } else if ( ov.getTERM().isEmpty() ) { 958 ov.setTERM( mostlyUsed.getTERM() ); 959 } else if (isCellularLocsTermsSimilar(ov.getTERM(), mostlyUsed.getTERM())) { 960 ov.setTERM( mostlyUsed.getTERM() ); 961 ov.setXREF( mostlyUsed.getXREF() ); 962 } 963 } 964 } 965 966 /* End of normalization methods */ 967 968} 969 970/** 971 * An encapsulation of the score and pep map 972 */ 973class PEPScore { 974 private Double score; 975 private Map<physicalEntityParticipant, physicalEntityParticipant> pepMap; 976 977 public PEPScore(Double score, 978 Map<physicalEntityParticipant, physicalEntityParticipant> pepMap) { 979 this.score = score; 980 this.pepMap = pepMap; 981 } 982 983 public Double getScore() { 984 return score; 985 } 986 987 public Map<physicalEntityParticipant, physicalEntityParticipant> getPEPMap() { 988 return pepMap; 989 } 990}