001package org.biopax.paxtools.controller; 002 003import org.apache.commons.logging.Log; 004import org.apache.commons.logging.LogFactory; 005import org.biopax.paxtools.io.BioPAXIOHandler; 006import org.biopax.paxtools.io.SimpleIOHandler; 007import org.biopax.paxtools.model.BioPAXElement; 008import org.biopax.paxtools.model.BioPAXFactory; 009import org.biopax.paxtools.model.BioPAXLevel; 010import org.biopax.paxtools.model.Model; 011import org.biopax.paxtools.model.level3.*; 012import org.biopax.paxtools.model.level3.Process; 013import org.biopax.paxtools.util.*; 014 015import java.io.ByteArrayInputStream; 016import java.io.ByteArrayOutputStream; 017import java.security.MessageDigest; 018import java.security.NoSuchAlgorithmException; 019import java.util.*; 020import java.util.concurrent.ExecutorService; 021import java.util.concurrent.Executors; 022import java.util.concurrent.TimeUnit; 023 024/** 025 * Several useful algorithms and examples, e.g., to extract root or child 026 * BioPAX L3 elements, remove dangling, replace elements 027 * or URIs, fix/infer property values, etc. 028 * 029 * NOTE: despite it is public class and has public methods, 030 * this class can be (and has been already) modified (sometimes considerably) 031 * in every minor revision; it was not designed to be Paxtools' public API... 032 * So, we encourage users copy some methods to their own apps rather than 033 * depend on this unstable utility class in long term. 034 * 035 * @author rodche, Arman, Emek 036 */ 037public final class ModelUtils 038{ 039 private static final Log LOG = LogFactory.getLog(ModelUtils.class); 040 041 /** 042 * Protected Constructor 043 * 044 * @throws AssertionError always (i.e, if called via java reflection) 045 */ 046 ModelUtils() { 047 throw new AssertionError("Not instantiable"); 048 } 049 050 051 static final MessageDigest MD5_DIGEST; 052 053 /** 054 * Initializer. 055 */ 056 static { 057 try { 058 MD5_DIGEST = MessageDigest.getInstance("MD5"); 059 } catch (NoSuchAlgorithmException e) { 060 throw new RuntimeException("Cannot instantiate MD5 MessageDigest!", e); 061 } 062 } 063 064 private final static BioPAXFactory factory = BioPAXLevel.L3.getDefaultFactory(); 065 066 private final static EditorMap em = SimpleEditorMap.L3; 067 068 private final static BioPAXIOHandler io = new SimpleIOHandler(BioPAXLevel.L3); 069 070 071 static 072 { 073 ((SimpleIOHandler) io).mergeDuplicates(true); 074 ((SimpleIOHandler) io).normalizeNameSpaces(false); 075 } 076 077 078 /** 079 * Replaces BioPAX elements in the model with ones from the map, 080 * updates corresponding BioPAX object references. 081 * 082 * It does not neither remove the old nor add new elements in the model 083 * (if required, one can do this before/after this method, e.g., using 084 * the same 'subs' map) 085 * 086 * This does visit all object properties of each "explicit" element 087 * in the model, but does not traverse deeper into one's sub-properties 088 * to replace something there as well (e.g., nested member entity references 089 * are not replaced unless parent entity reference present in the model) 090 * 091 * This does not automatically move/migrate old (replaced) object's 092 * children to new objects (the replacement ones are supposed to have 093 * their own properties already set or to be set shortly; otherwise, 094 * consider using of something like {@link #fixDanglingInverseProperties(BioPAXElement, Model)} after. 095 * 096 * @param model biopax model where the objects are to be replaced 097 * @param subs the replacements map (many-to-one, old-to-new) 098 * @exception IllegalBioPAXArgumentException if there is an incompatible type replacement object 099 */ 100 public static void replace(Model model, final Map<? extends BioPAXElement, ? extends BioPAXElement> subs) 101 { 102 // update properties 103 Visitor visitor = new Visitor() 104 { 105 @Override 106 public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor) 107 { 108 if (editor instanceof ObjectPropertyEditor && range != null && subs.containsKey(range)) 109 { 110 BioPAXElement value = (BioPAXElement) range; 111 // 'value' is to be replaced with the 'replacement' 112 BioPAXElement replacement = subs.get(range); //can get null (ok) 113 114 // normal biopax property - 115 if (replacement != null && !editor.getRange().isInstance(replacement)) 116 { 117 throw new IllegalBioPAXArgumentException( 118 "Incompatible type! Attempted to replace " 119 + value.getRDFId() + " (" + value.getModelInterface().getSimpleName() 120 + ") with " + replacement.getRDFId() + " (" 121 + replacement.getModelInterface().getSimpleName() + "); " 122 + "property: " + editor.getProperty() 123 + " of bean: " + domain.getRDFId() + " (" 124 + domain.getModelInterface().getSimpleName() + ")"); 125 } 126 127 if (replacement != value) 128 { 129 editor.removeValueFromBean(value, domain); 130 editor.setValueToBean(replacement, domain); 131 } else { 132 LOG.debug("replace: skipped the identical: " + replacement.getRDFId()); 133 } 134 } 135 } 136 }; 137 138 Traverser traverser = new Traverser(em, visitor); 139 for (BioPAXElement bpe : new HashSet<BioPAXElement>(model.getObjects())) 140 { 141 // update object properties and clear inverse properties using 'subs' map 142 traverser.traverse(bpe, null); //model is not needed 143 } 144 } 145 146 147 /** 148 * Finds "root" BioPAX objects that belong to a particular class (incl. sub-classes) 149 * in the model. 150 * 151 * Note: however, such "root" elements may or may not be, a property of other 152 * elements, not included in the model. 153 * @param model biopax model to work with 154 * @param filterClass filter class (including subclasses) 155 * @param <T> biopax type 156 * @return set of the root biopax objects of given type 157 */ 158 public static <T extends BioPAXElement> Set<T> getRootElements(final Model model, final Class<T> filterClass) 159 { 160 // copy all such elements (initially, we think all are roots...) 161 final Set<T> result = new HashSet<T>(model.getObjects(filterClass)); 162 163 //"shallow" traverser (direct object properties only - Visitor.visit does not call traverse again) 164 @SuppressWarnings("unchecked") 165 Traverser traverser = new Traverser(em, 166 new Visitor() { 167 @Override 168 public void visit(BioPAXElement parent, Object value, Model model, 169 PropertyEditor<?, ?> editor) 170 { 171 if (filterClass.isInstance(value)) result.remove(value); 172 } 173 }, 174 new Filter<PropertyEditor>() { 175 @Override 176 public boolean filter(PropertyEditor pe) { 177 return (pe instanceof ObjectPropertyEditor); 178 } 179 }); 180 181 // but we run from every element (all types) 182 for(BioPAXElement e : model.getObjects()) 183 traverser.traverse(e, null); 184 185 return result; 186 } 187 188 189 /** 190 * Iteratively removes "dangling" elements of given type and its sub-types, 191 * e.g. Xref.class objects, from the BioPAX model. 192 * 193 * If the "model" does not contain any root Entity class objects, 194 * and the second parameter is basic UtilityClass.class (i.e., not its sub-class), 195 * then it simply logs a warning and quits shortly (otherwise, it would 196 * remove everything from the model). Do not use basic Entity.class either 197 * (but a sub-class is OK) for the same reason (it would delete everything). 198 * 199 * This, however, does not change relationships 200 * among objects, particularly, some inverse properties, 201 * such as entityReferenceOf or xrefOf, may still 202 * refer to a removed object. 203 * @param model to modify 204 * @param clazz filter-class (filter by this type and sub-classes) 205 * @param <T> biopax type 206 * @return removed objects 207 */ 208 public static <T extends BioPAXElement> Set<BioPAXElement> removeObjectsIfDangling(Model model, Class<T> clazz) 209 { 210 final Set<BioPAXElement> removed = new HashSet<BioPAXElement>(); 211 212 // 'equals' below is used intentionally (isAssignableFrom() would be incorrect) 213 if(Entity.class.equals(clazz)) { 214 LOG.warn("Ignored removeObjectsIfDangling call for: " + 215 "Entity.class (it would delete all)"); 216 return removed; 217 } 218 if(UtilityClass.class.equals(clazz) 219 && getRootElements(model, Entity.class).isEmpty()) 220 { 221 LOG.warn("Ignored removeObjectsIfDangling call: " + 222 "no root entities model; UtilityClass.class"); 223 return removed; 224 } 225 226 Set<T> dangling = getRootElements(model, clazz); 227 228 // get rid of dangling objects 229 if (!dangling.isEmpty()) 230 { 231 LOG.info(dangling.size() + " " + clazz.getSimpleName() + 232 " dangling objects will be deleted..."); 233 234 for (BioPAXElement thing : dangling) 235 { 236 model.remove(thing); 237 removed.add(thing); 238 LOG.debug("removed (dangling) " + thing.getRDFId() + " (" 239 + thing.getModelInterface().getSimpleName() + ") " + thing); 240 } 241 242 // some may have become dangling now, so check again... 243 removed.addAll(removeObjectsIfDangling(model, clazz)); 244 } 245 246 return removed; 247 } 248 249 250 /** 251 * Cuts the BioPAX model off other models and BioPAX objects 252 * by essentially performing write/read to/from OWL. 253 * The resulting model contains new objects with same IDs 254 * and have object properties "fixed", i.e., dangling values 255 * become null/empty, and inverse properties (e.g. xrefOf) 256 * re-calculated. The original model is unchanged. 257 * 258 * Note: this method will fail for very large models 259 * (if resulting RDF/XML utf8 string is longer than approx. 1Gb) 260 * 261 * @param model biopax model to process 262 * @return copy of the model 263 */ 264 public static Model writeRead(Model model) 265 { 266 BioPAXIOHandler io = new SimpleIOHandler(model.getLevel()); 267 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 268 io.convertToOWL(model, baos); 269 return io.convertFromOWL(new ByteArrayInputStream(baos.toByteArray())); 270 } 271 272 273 /** 274 * Gets direct children of a given BioPAX element 275 * and adds them to a new model. 276 * @param bpe biopax element/object 277 * @return new model 278 */ 279 public static Model getDirectChildren(BioPAXElement bpe) 280 { 281 Model m = factory.createModel(); 282 283 Traverser traverser = new Traverser(em, new Visitor() { 284 @Override 285 public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor<?,?> editor) 286 { 287 if (range instanceof BioPAXElement && !model.containsID(((BioPAXElement) range).getRDFId())) 288 model.add((BioPAXElement) range); 289 } 290 }); 291 292 traverser.traverse(bpe, m); 293 294 return m; 295 } 296 297 298 /** 299 * Gets all the child BioPAX elements of a given BioPAX element 300 * (using the "tuned" {@link Fetcher}) and adds them to a 301 * new model. 302 * @param bpe biopax object 303 * @param filters property filters (e.g., for Fetcher to skip some properties). Default is to skip 'nextStep'. 304 * @return new biopax Model that contain all the child objects 305 */ 306 public static Model getAllChildren(BioPAXElement bpe, 307 @SuppressWarnings("rawtypes") Filter<PropertyEditor>... filters) 308 { 309 Model m = factory.createModel(); 310 if (filters.length == 0) 311 { 312 new Fetcher(em, Fetcher.nextStepFilter).fetch(bpe, m); 313 } else 314 { 315 new Fetcher(em, filters).fetch(bpe, m); 316 } 317 m.remove(bpe); // remove the parent 318 319 return m; 320 } 321 322 /** 323 * Collects direct children of a given BioPAX element. 324 * @param bpe biopax object (parent) 325 * @return set of child biopax objects 326 */ 327 public static Set<BioPAXElement> getDirectChildrenAsSet(BioPAXElement bpe) 328 { 329 final Set<BioPAXElement> toReturn = new HashSet<BioPAXElement>(); 330 331 Traverser traverser = new Traverser(em, new Visitor() { 332 @Override 333 public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor<?, ?> editor) { 334 if (range instanceof BioPAXElement) { 335 toReturn.add((BioPAXElement) range); 336 } 337 } 338 } 339 ); 340 341 traverser.traverse(bpe, null); 342 343 return toReturn; 344 } 345 346 347 /** 348 * Generates simple counts of different elements in the model. 349 * 350 * @param model biopax model to analyze 351 * @return a biopax types - to counts of objects of each type map 352 */ 353 public static Map<Class<? extends BioPAXElement>, Integer> generateClassMetrics(Model model) 354 { 355 Map<Class<? extends BioPAXElement>, Integer> metrics = new HashMap<Class<? extends BioPAXElement>, Integer>(); 356 for (BioPAXElement bpe : model.getObjects()) 357 { 358 Integer count = metrics.get(bpe.getModelInterface()); 359 if (count == null) 360 { 361 count = 1; 362 } else 363 { 364 count = count + 1; 365 } 366 metrics.put(bpe.getModelInterface(), count); 367 } 368 return metrics; 369 } 370 371 372 /** 373 * A more strict, type-safe way to ask for a biopax object 374 * from the model, unlike {@link Model#getByID(String)}. 375 * 376 * @param model biopax model to query 377 * @param uri absolute URI of a biopax element 378 * @param clazz class-filter (to filter by the biopax type and its sub-types) 379 * @param <T> biopax type 380 * @return the biopax object or null (if no such element, or element with this URI is of incompatible type) 381 */ 382 public static <T extends BioPAXElement> T getObject(Model model, String uri, Class<T> clazz) 383 { 384 BioPAXElement bpe = model.getByID(uri); 385 if (clazz.isInstance(bpe)) 386 { 387 return (T) bpe; 388 } else 389 { 390 return null; 391 } 392 } 393 394 395 /** 396 * Calculates MD5 hash code (as 32-byte hex. string). 397 * 398 * This method is not BioPAX specific. Can be 399 * used for many purposes, such as generating 400 * new unique URIs, database primary keys, etc. 401 * 402 * 403 * @param id some identifier, e.g., URI 404 * @return the 32-byte digest string 405 */ 406 public static String md5hex(String id) 407 { 408 byte[] digest = MD5_DIGEST.digest(id.getBytes()); 409 StringBuffer sb = new StringBuffer(); 410 for (byte b : digest) 411 { 412 sb.append(Integer.toHexString((int) (b & 0xff) | 0x100).substring(1, 3)); 413 } 414 String hex = sb.toString(); 415 return hex; 416 } 417 418 419 /** 420 * Unlinks <em>object properties</em> of the BioPAX object 421 * from values the model does not have. 422 * 423 * @param bpe a biopax object 424 * @param model the model to look for objects in 425 */ 426 public static void fixDanglingObjectProperties(BioPAXElement bpe, Model model) 427 { 428 final Visitor visitor = new Visitor() 429 { 430 @Override 431 public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor) 432 { 433 if (editor instanceof ObjectPropertyEditor) 434 { 435 BioPAXElement value = (BioPAXElement) range; 436 if (value != null && !model.containsID(value.getRDFId())) 437 editor.removeValueFromBean(value, domain); 438 } 439 } 440 }; 441 442 Traverser traverser = new Traverser(em, visitor); 443 traverser.traverse(bpe, model); 444 } 445 446 447 /** 448 * Unlinks <em>inverse properties</em> of the BioPAX object 449 * from values the other model does not have. 450 * @param bpe BioPAX object 451 * @param model where to look for other objects 452 */ 453 public static void fixDanglingInverseProperties(BioPAXElement bpe, Model model) 454 { 455 final Visitor visitor = new Visitor() 456 { 457 @Override 458 public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor) 459 { 460 BioPAXElement value = (BioPAXElement) range; 461 if (value != null && !model.containsID(value.getRDFId())) 462 editor.removeValueFromBean(domain, value); //right order! 463 } 464 }; 465 466 TraverserBilinked traverser = new TraverserBilinked(em, visitor); 467 traverser.setInverseOnly(true); 468 traverser.traverse(bpe, model); 469 } 470 471 472 // Moved from FeatureUtils; provides operations for comparing features of physical entities. 473 474 static enum FeatureType 475 { 476 FEATURE, 477 NOT_FEATURE, 478 UNKNOWN_FEATURE; 479 } 480 481 // TODO annotate 482 public static Set<EntityFeature> getFeatureIntersection(PhysicalEntity first, FeatureType firstClass, 483 PhysicalEntity second, FeatureType secondClass) 484 { 485 Set<EntityFeature> intersection = getFeatureSetByType(first, firstClass); 486 intersection.removeAll(getFeatureSetByType(second, secondClass)); 487 return intersection; 488 } 489 490 // TODO annotate 491 public static Set<EntityFeature> getFeatureSetByType(PhysicalEntity pe, FeatureType type) 492 { 493 494 Set<EntityFeature> modifiableSet = new HashSet<EntityFeature>(); 495 496 switch (type) 497 { 498 case FEATURE: 499 modifiableSet.addAll(pe.getFeature()); 500 break; 501 case NOT_FEATURE: 502 modifiableSet.addAll(pe.getNotFeature()); 503 break; 504 case UNKNOWN_FEATURE: 505 { 506 if (pe instanceof SimplePhysicalEntity) 507 { 508 modifiableSet.addAll(((SimplePhysicalEntity) pe).getEntityReference().getEntityFeature()); 509 modifiableSet.removeAll(pe.getFeature()); 510 modifiableSet.removeAll(pe.getNotFeature()); 511 } 512 } 513 } 514 return modifiableSet; 515 } 516 517 518 /** 519 * Finds and adds all (missing) entity features 520 * to given entity reference from all its owner 521 * simple physical entities ('feature' and 'notFeature' 522 * properties). 523 * 524 * Though, it neither checks for nor resolves any violations 525 * of the 'entityFeature' property's inverse functional constraint 526 * (i.e., an EntityFeature instance can only belong to one and only one 527 * EntityReference object). 528 * 529 * @param er entity reference object 530 * @param fix flag 531 * @return true or false 532 */ 533 public static boolean checkERFeatureSet(EntityReference er, boolean fix) 534 { 535 boolean check = true; 536 for (SimplePhysicalEntity spe : er.getEntityReferenceOf()) 537 { 538 for (EntityFeature ef : spe.getFeature()) 539 { 540 check = scanAndAddToFeatureSet(er, fix, check, ef); 541 // if not fixing return at first fail, otherwise go on; 542 if (!fix && !check) return check; 543 } 544 for (EntityFeature ef : spe.getNotFeature()) 545 { 546 check = scanAndAddToFeatureSet(er, fix, check, ef); 547 // if not fixing return at first fail, otherwise go on; 548 if (!fix && !check) return check; 549 } 550 } 551 return check; 552 } 553 554 private static boolean scanAndAddToFeatureSet(EntityReference er, boolean fix, boolean check, EntityFeature ef) 555 { 556 if (!er.getEntityFeature().contains(ef)) 557 { 558 check = false; 559 if (fix) 560 { 561 er.addEntityFeature(ef); 562 //TODO resolve inverse functional prop. constraint violation (e.g., copy/replace the e.f. before adding if it has entityFeatureOf not null)? 563 } 564 } 565 return check; 566 } 567 568 // TODO annotate 569 public static Set<EntityFeature> findFeaturesAddedToSecond(PhysicalEntity first, PhysicalEntity second, 570 boolean fix) 571 { 572 573 if (checkCommonEntityReferenceForTwoPEs(first, second, fix)) return null; 574 Set<EntityFeature> explicit = 575 getFeatureIntersection(first, FeatureType.NOT_FEATURE, second, FeatureType.FEATURE); 576 Set<EntityFeature> implicit = 577 getFeatureIntersection(first, FeatureType.UNKNOWN_FEATURE, second, FeatureType.FEATURE); 578 Set<EntityFeature> negativeImplicit = 579 getFeatureIntersection(first, FeatureType.NOT_FEATURE, second, FeatureType.UNKNOWN_FEATURE); 580 581 if (fix) 582 { 583 for (EntityFeature implied : implicit) 584 { 585 LOG.info("The feature " + implied + "implied as a not-feature of " + first + ". " + 586 "Adding it to the not-feature list"); 587 first.addNotFeature(implied); 588 } 589 590 for (EntityFeature implied : negativeImplicit) 591 { 592 LOG.info("The feature " + implied + "implied as a feature of " + second + ". " + 593 "Adding it to the feature list"); 594 second.addFeature(implied); 595 } 596 597 } 598 explicit.retainAll(implicit); 599 explicit.retainAll(negativeImplicit); 600 return explicit; 601 } 602 603 private static boolean checkCommonEntityReferenceForTwoPEs(PhysicalEntity first, PhysicalEntity second, 604 boolean fix) 605 { 606 if (first instanceof SimplePhysicalEntity) 607 { 608 EntityReference er = ((SimplePhysicalEntity) first).getEntityReference(); 609 if (!er.getEntityReferenceOf().contains(second)) 610 { 611 LOG.warn("These two physicalEntities do not share an EntityReference. They can not be compared! " + 612 "Skipping"); 613 return false; 614 } else if (!checkERFeatureSet(er, fix)) 615 { 616 LOG.warn("ER feature set is incomplete!"); 617 if (!fix) 618 { 619 LOG.warn("fixing..."); 620 } else 621 { 622 LOG.warn("skipping"); 623 return false; 624 } 625 } 626 return true; 627 } else 628 { 629 LOG.warn("These two physicalEntities do not share an EntityReference. They can not be compared! " + 630 "Skipping"); 631 return false; 632 } 633 634 } 635 636 637 /** 638 * Converts generic simple physical entities, 639 * i.e., physical entities except Complexes 640 * that have not empty memberPhysicalEntity property, 641 * into equivalent physical entities 642 * with generic entity references (which have members); 643 * this is a better and less error prone way to model 644 * generic molecules in BioPAX L3. 645 * 646 * Notes: 647 * Generic Complexes could be normalized in a similar way, 648 * but they do not have entityReference property and might 649 * contain generic (incl. not yet normalized) components, which 650 * makes it complicated. 651 * 652 * Please avoid using 'memberPhysicalEntity' in your BioPAX L3 models 653 * unless absolutely sure/required, for there is an alternative way 654 * (using PhysicalEntity/entityReference/memberEntityReference), and 655 * this will probably be deprecated in the future BioPAX releases. 656 * 657 * @param model biopax model to fix 658 */ 659 public static void normalizeGenerics(Model model) 660 { 661 662 HashMap<Set<EntityReference>, EntityReference> memberMap = new HashMap<Set<EntityReference>, 663 EntityReference>(); 664 Set<SimplePhysicalEntity> pes = model.getObjects(SimplePhysicalEntity.class); 665 Set<SimplePhysicalEntity> pesToBeNormalized = new HashSet<SimplePhysicalEntity>(); 666 667 for (SimplePhysicalEntity pe : pes) 668 { 669 if (pe.getEntityReference() == null) 670 { 671 if (!pe.getMemberPhysicalEntity().isEmpty()) 672 { 673 pesToBeNormalized.add(pe); 674 } 675 } 676 } 677 678 for (SimplePhysicalEntity pe : pesToBeNormalized) 679 { 680 try 681 { 682 createNewERandAddMembers(model, pe, memberMap); 683 } 684 catch (Exception e) 685 { 686 e.printStackTrace(); 687 } 688 689 690 } 691 } 692 693 694 private static void createNewERandAddMembers(Model model, SimplePhysicalEntity pe, 695 HashMap<Set<EntityReference>, EntityReference> memberMap) 696 { 697 SimplePhysicalEntity first = (SimplePhysicalEntity) pe.getMemberPhysicalEntity().iterator().next(); 698 Set<EntityReference> members = pe.getGenericEntityReferences(); 699 EntityReference er = memberMap.get(members); 700 if (er == null) 701 { 702 EntityReference firstEntityReference = first.getEntityReference(); 703 if (firstEntityReference != null) 704 { 705 //generate a new URI in the same namespace (xml:base) 706 String syntheticId = model.getXmlBase() + md5hex(pe.getRDFId()); 707 // create and add a new EntityReference 708 er = (EntityReference) model.addNew(firstEntityReference.getModelInterface(), syntheticId); 709 // copy names and xrefs (making orig. unif.xrefs become relat.xrefs) 710 copySimplePointers(model, pe, er); 711 712 er.addComment("auto-generated by Paxtools from generic " 713 + pe.getModelInterface().getSimpleName() 714 + ", uri=" + pe.getRDFId() + ""); 715 716 for (EntityReference member : members) 717 { 718 er.addMemberEntityReference(member); 719 } 720 721 memberMap.put(members, er); 722 } 723 } 724 pe.setEntityReference(er); 725 } 726 727 728 /** 729 * Copies names and xrefs from source to target 730 * biopax object; it does not copy unification xrefs 731 * but instead adds relationship xrefs using the same 732 * db and id values as source's unification xrefs. 733 * 734 * @param model the biopax model where the source and target objects belong 735 * @param source from 736 * @param target to 737 */ 738 public static void copySimplePointers(Model model, Named source, Named target) 739 { 740 target.setDisplayName(source.getDisplayName()); 741 target.setStandardName(source.getStandardName()); 742 for (String name : source.getName()) 743 { 744 target.addName(name); 745 } 746 for (Xref xref : source.getXref()) 747 { 748 if ((xref instanceof UnificationXref)) 749 { 750 // generate URI using model's xml:base and xref's properties 751 String id = model.getXmlBase() + md5hex(xref.getDb()+xref.getRDFId()); 752 Xref byID = (Xref) model.getByID(id); 753 if (byID == null) 754 { 755 RelationshipXref rref = model.addNew(RelationshipXref.class, id); 756 rref.setDb(xref.getDb()); 757 rref.setId(xref.getId()); 758 rref.setDbVersion(xref.getDbVersion()); 759 rref.setIdVersion(xref.getDbVersion()); 760 xref = rref; 761 } else 762 { 763 xref = byID; 764 } 765 } 766 767 target.addXref(xref); 768 } 769 } 770 771 772 /** 773 * TODO annotate or deprecate... 774 * 775 * @param model biopax model to edit 776 */ 777 public static void resolveFeatures(Model model) 778 { 779 if (!model.getLevel().equals(BioPAXLevel.L3)) 780 { 781 throw new UnsupportedOperationException( 782 "resolveFeatures method does not work with " + model.getLevel()); 783 } else 784 { 785 resolveBindingFeatures(model); 786 787 //For each entity reference: 788 for (EntityReference er : model.getObjects(EntityReference.class)) 789 { 790 for (SimplePhysicalEntity spe : er.getEntityReferenceOf()) 791 { 792 for (Interaction interaction : spe.getParticipantOf()) 793 { 794 //we will do this left to right 795 if (interaction instanceof Conversion) 796 { 797 Conversion cnv = (Conversion) (interaction); 798 if (cnv.getLeft().contains(spe)) 799 { 800 for (PhysicalEntity physicalEntity : cnv.getRight()) 801 { 802 if (physicalEntity instanceof SimplePhysicalEntity) 803 { 804 SimplePhysicalEntity otherSPE = (SimplePhysicalEntity) (physicalEntity); 805 if (otherSPE.getEntityReference().equals(spe.getEntityReference())) 806 { 807 Set<EntityFeature> added = 808 findFeaturesAddedToSecond(physicalEntity, otherSPE, true); 809 Set<EntityFeature> removed = 810 findFeaturesAddedToSecond(otherSPE, physicalEntity, true); 811 } 812 } 813 } 814 //TODO HANDLE complexes? 815 } 816 } 817 } 818 } 819 } 820 } 821 } 822 823 824 private static void resolveBindingFeatures(Model model) 825 { 826 ShallowCopy copier = new ShallowCopy(BioPAXLevel.L3); 827 828 //For each Complex 829 Set<Complex> complexes = model.getObjects(Complex.class); 830 for (Complex complex : complexes) { 831 resolveBindingFeatures(model, complex, copier); 832 } 833 } 834 835 836 private static void resolveBindingFeatures(Model model, Complex complex, ShallowCopy copier) 837 { 838 Set<PhysicalEntity> components = complex.getComponent(); 839 for (PhysicalEntity component : components) 840 { 841 resolveFeaturesOfComponent(model, complex, component, copier); 842 } 843 } 844 845 private static void resolveFeaturesOfComponent(Model model, Complex complex, PhysicalEntity component, 846 ShallowCopy copier) 847 { 848 boolean connected = false; 849 Set<EntityFeature> feature = component.getFeature(); 850 for (EntityFeature ef : feature) 851 { 852 if (ef instanceof BindingFeature) 853 { 854 BindingFeature bindsTo = ((BindingFeature) ef).getBindsTo(); 855 Set<PhysicalEntity> featureOf = bindsTo.getFeatureOf(); 856 if (!SetEquivalenceChecker.hasEquivalentIntersection(complex.getComponent(), featureOf)) 857 { 858 System.err.println( 859 "The Complex" + complex.getName() + "(" + complex.getRDFId() + ") has component" + 860 component.getDisplayName() + "(" + component.getRDFId() + ") which has" + 861 "a binding feature (" + ef.getRDFId() + "), but none of the bound " + 862 "participants are in this complex"); 863 //TODO This is an error - fail. 864 return; 865 } else 866 { 867 connected = true; 868 } 869 } 870 } 871 if (!connected) 872 { 873 Set<Interaction> participantOf = component.getParticipantOf(); 874 for (Interaction interaction : participantOf) 875 { 876 //It is ok for complex members to control a participant 877 if (!(interaction instanceof Control)) 878 { 879 component = createCopy(model, complex, component, copier); 880 break; 881 } 882 } 883 884 BindingFeature bf = model.addNew(BindingFeature.class, 885 component.getRDFId() + "bond" + "in_Complex_" + complex.getRDFId()); 886 component.addFeature(bf); 887 if (component instanceof SimplePhysicalEntity) 888 { 889 ((SimplePhysicalEntity) component).getEntityReference().addEntityFeature(bf); 890 } 891 } 892 } 893 894 private static PhysicalEntity createCopy(Model model, Complex complex, PhysicalEntity component, ShallowCopy copier) 895 { 896 //This is an aggressive fix - if a complex member is present in both an interaction that is not a control 897 // and a complex, we are creating clone, adding it a binding feature to mark it and put it into the 898 // complex and remove the old one. 899 complex.removeComponent(component); 900 component = copier.copy(model, component, component.getRDFId() + "in_Complex_" + complex.getRDFId()); 901 complex.addComponent(component); 902 return component; 903 } 904 905 906 /** 907 * This method iterates over the features in a model and tries to find equivalent objects and merges them. 908 * @param model to be fixed 909 */ 910 public static void replaceEquivalentFeatures(Model model) 911 { 912 913 EquivalenceGrouper<EntityFeature> equivalents = new EquivalenceGrouper<EntityFeature>(); 914 HashMap<EntityFeature, EntityFeature> mapped = new HashMap<EntityFeature, EntityFeature>(); 915 HashSet<EntityFeature> scheduled = new HashSet<EntityFeature>(); 916 917 for (EntityFeature ef : model.getObjects(EntityFeature.class)) 918 { 919 if (ef.getEntityFeatureOf() == null) 920 { 921 inferEntityFromPE(ef, ef.getFeatureOf()); 922 if (ef.getEntityFeatureOf() == null) inferEntityFromPE(ef, ef.getNotFeatureOf()); 923 } 924 equivalents.add(ef); 925 } 926 for (List<EntityFeature> bucket : equivalents.getBuckets()) 927 { 928 for (int i = 1; i < bucket.size(); i++) 929 { 930 EntityFeature ef = bucket.get(i); 931 if (LOG.isWarnEnabled()) 932 { 933 LOG.warn("removing: "+ ef.getRDFId()+ " since it is equivalent to: "+ bucket.get(0)); 934 } 935 scheduled.add(ef); 936 } 937 } 938 for (EntityFeature entityFeature : scheduled) 939 { 940 model.remove(entityFeature); 941 } 942 for (PhysicalEntity physicalEntity : model.getObjects(PhysicalEntity.class)) 943 { 944 Set<EntityFeature> features = new HashSet<EntityFeature>(physicalEntity.getFeature()); 945 for (EntityFeature feature : features) 946 { 947 EntityFeature that = mapped.get(feature); 948 if (that != null && !that.equals(feature)) 949 { 950 LOG.debug(" replacing " + feature + 951 "{" + feature.getRDFId() + "} with " + 952 that + "{" + that.getRDFId() + "}"); 953 physicalEntity.removeFeature(feature); 954 physicalEntity.addFeature(that); 955 } 956 } 957 } 958 } 959 960 961 private static void inferEntityFromPE(EntityFeature ef, Set<PhysicalEntity> pes) 962 { 963 964 for (PhysicalEntity physicalEntity : pes) 965 { 966 if (physicalEntity instanceof SimplePhysicalEntity) 967 { 968 EntityReference er = ((SimplePhysicalEntity) physicalEntity).getEntityReference(); 969 if (er != null) 970 { 971 er.addEntityFeature(ef); 972 LOG.debug("Inferred the ER of " + ef.getRDFId() + " as " + er.getRDFId()); 973 return; 974 } 975 } 976 } 977 } 978 979 980 /** 981 * Collects data type (not object) property 982 * values (can be then used for full-text indexing). 983 * 984 * @param biopaxElement biopax object 985 * @param depth greater or equals 0: 0 means use this object's 986 * data properties only; 1 - add child's data properties, etc.; 987 * (the meaning is slightly different from that of Fetcher.fetch(..) method) 988 * @param dataPropertyFilters - biopax data property filters to optionally 989 * either skip e.g. properties 'sequence', 'temperature', 990 * or only accept 'term', 'comment', 'name', etc. 991 * @return set of keywords 992 */ 993 public static Set<String> getKeywords(BioPAXElement biopaxElement, int depth, 994 Filter<DataPropertyEditor>... dataPropertyFilters) 995 { 996 LOG.debug("getKeywords called: " + biopaxElement.getRDFId()); 997 998 EditorMap em = SimpleEditorMap.L3; 999 Set<String> ss = new HashSet<String>(); 1000 1001 //if depth>0, fetch child biopax objects (ignoring PathwayStep.nextStep property) 1002 Set<BioPAXElement> elms = (depth > 0) 1003 ? new Fetcher(em, Fetcher.nextStepFilter).fetch(biopaxElement, depth) 1004 : new HashSet<BioPAXElement>(); 1005 1006 //add this one 1007 elms.add(biopaxElement); 1008 1009 for (BioPAXElement bpe : elms) { 1010 Set<PropertyEditor> props = em.getEditorsOf(bpe); 1011 for (PropertyEditor pe : props) { 1012 //skip for object prop. or one that fails to pass a filter 1013 if (pe instanceof ObjectPropertyEditor 1014 || !filter((DataPropertyEditor)pe, dataPropertyFilters)) 1015 continue; 1016 1017 Set values = pe.getValueFromBean(bpe); 1018 for (Object v : values) { 1019 if (!pe.isUnknown(v)) { 1020 ss.add(v.toString()); 1021 } 1022 } 1023 } 1024 } 1025 1026 return ss; 1027 } 1028 1029 1030 private static <T extends PropertyEditor> boolean filter(T pe, Filter<T>... propertyFilters) { 1031 if(propertyFilters.length==0) 1032 return true; 1033 1034 for(Filter<T> pf : propertyFilters) { 1035 if (!pf.filter(pe)) { 1036 return false; 1037 } 1038 } 1039 1040 return true; 1041 } 1042 1043 1044 /** 1045 * Collects BioSource objects from this or 1046 * related elements (where it makes sense; 1047 * though the biopax element might have no 1048 * or empty 'organism' property at all. 1049 * 1050 * The idea is to additionally associate with 1051 * existing BioSource objects, and thus make 1052 * filtering by organism possible, for at least 1053 * Interaction, Protein, Complex, Dna, etc. 1054 * biopax entities. 1055 * 1056 * 1057 * @param biopaxElement biopax object 1058 * @return organism names 1059 */ 1060 public static Set<BioSource> getOrganisms(BioPAXElement biopaxElement) { 1061 final Set<BioSource> biosources = new HashSet<BioSource>(); 1062 //shortcut 1063 if(biopaxElement == null) 1064 return biosources; 1065 1066 LOG.debug("getOrganisms called: " + biopaxElement.getRDFId()); 1067 1068 if(biopaxElement instanceof BioSource) { 1069 biosources.add((BioSource) biopaxElement); 1070 } else if (biopaxElement instanceof Pathway) { 1071 if(((Pathway)biopaxElement).getOrganism() != null) 1072 biosources.add(((Pathway)biopaxElement).getOrganism()); 1073// else 1074// //if not set, - infer from children (expensive) 1075// biosources.addAll((new Fetcher(em, Fetcher.nextStepFilter)) 1076// .fetch(biopaxElement, BioSource.class)); 1077 1078 } else if (biopaxElement instanceof Gene) { 1079 if(((Gene)biopaxElement).getOrganism() != null) 1080 biosources.add(((Gene) biopaxElement).getOrganism()); 1081 } else if (biopaxElement instanceof PathwayStep) { 1082 Pathway pw = ((PathwayStep) biopaxElement).getPathwayOrderOf(); 1083 if(pw != null && pw.getOrganism() != null) 1084 biosources.add(pw.getOrganism()); 1085 } else if (biopaxElement instanceof Interaction 1086 || biopaxElement instanceof EntityReference 1087 || biopaxElement instanceof PhysicalEntity) { 1088 1089 if (biopaxElement instanceof SequenceEntityReference) { 1090 if(((SequenceEntityReference) biopaxElement).getOrganism() != null) 1091 biosources.add(((SequenceEntityReference) biopaxElement).getOrganism()); 1092 } 1093 1094 //get from children (members, participants, components, etc.) 1095 biosources.addAll((new Fetcher(em, Fetcher.nextStepFilter)) 1096 .fetch(biopaxElement, BioSource.class)); 1097 } 1098 1099 return biosources; 1100 } 1101 1102 1103 /** 1104 * Collects all Provenance objects 1105 * associated with this one as follows: 1106 * - if the element is Entity (has 'dataSource' property) 1107 * or is Provenence itself, get the values and quit; 1108 * - if the biopax element is PathwayStep or EntityReference, 1109 * traverse into some of its object/inverse properties to collect 1110 * dataSource values from associated entities. 1111 * - return empty set for all other BioPAX types (it is less important 1112 * to associate common self-descriptive biopax utility classes with 1113 * particular pathway data sources) 1114 * 1115 * @param biopaxElement a biopax object 1116 * @return Provenance objects set 1117 */ 1118 public static Set<Provenance> getDatasources(BioPAXElement biopaxElement) { 1119 1120 final Set<Provenance> datasources = new HashSet<Provenance>(); 1121 1122 //shortcut 1123 if(biopaxElement == null) 1124 return datasources; 1125 1126 LOG.debug("getDatasources called: " + biopaxElement.getRDFId()); 1127 1128 if (biopaxElement instanceof Provenance) { 1129 datasources.add((Provenance) biopaxElement); 1130 } else if (biopaxElement instanceof Entity) { 1131 datasources.addAll(((Entity) biopaxElement).getDataSource()); 1132 } else if (biopaxElement instanceof EntityReference) { 1133 // Let ERs inherit its dataSource from parent PEs or ERs: 1134 for(SimplePhysicalEntity spe : ((EntityReference) biopaxElement).getEntityReferenceOf()) 1135 datasources.addAll(getDatasources(spe)); 1136 for(EntityReference er : ((EntityReference) biopaxElement).getMemberEntityReferenceOf()) 1137 datasources.addAll(getDatasources(er)); 1138 } else if (biopaxElement instanceof PathwayStep) { 1139 datasources.addAll(getDatasources(((PathwayStep) biopaxElement).getPathwayOrderOf())); 1140 } else { 1141 // ignore 1142 } 1143 1144 return datasources; 1145 } 1146 1147 1148 /** 1149 * Collects all parent Pathway objects recursively 1150 * traversing the inverse object properties of the 1151 * biopax element. It ignores all BioPAX types except (incl. sub-classes of): 1152 * Pathway, Interaction, PathwayStep, PhysicalEntity, EntityReference, and Gene. 1153 * 1154 * @param biopaxElement biopax object 1155 * @return inferred parent pathways 1156 */ 1157 public static Set<Pathway> getParentPathways(BioPAXElement biopaxElement) { 1158 final Set<BioPAXElement> visited = new HashSet<BioPAXElement>(); 1159 return getParentPathwaysRecursively(biopaxElement, visited); 1160 } 1161 1162 // recursively finds all the parent pathways of the object, while escaping infinite loops 1163 private static Set<Pathway> getParentPathwaysRecursively( 1164 final BioPAXElement biopaxElement, final Set<BioPAXElement> visited) { 1165 1166 final Set<Pathway> pathways = new HashSet<Pathway>(); 1167 1168 //shortcut, when bpe is null or already processed 1169 if(biopaxElement == null || !visited.add(biopaxElement)) { 1170 LOG.info("Ignored null or previously visited object:" + biopaxElement); 1171 return pathways; 1172 } 1173 1174 LOG.debug("getParentPathways called: " + biopaxElement.getRDFId()); 1175 1176 if(biopaxElement instanceof Process) { 1177 if(biopaxElement instanceof Pathway) // add itself 1178 pathways.add((Pathway) biopaxElement); 1179 // continue looking up to parent pathways (until all top ones reached) 1180 for(Pathway pw : ((Process)biopaxElement).getPathwayComponentOf()) 1181 pathways.addAll(getParentPathwaysRecursively(pw, visited)); //TODO bug: in PC2v8 (thanks to kegg), inf. loop here (StackOverFlow) 1182 for(Interaction it : ((Process)biopaxElement).getParticipantOf()) 1183 pathways.addAll(getParentPathwaysRecursively(it, visited)); 1184 for(PathwayStep pt : ((Process)biopaxElement).getStepProcessOf()) 1185 pathways.addAll(getParentPathwaysRecursively(pt, visited)); 1186 } else if(biopaxElement instanceof PathwayStep) { 1187 pathways.addAll(getParentPathwaysRecursively(((PathwayStep) biopaxElement).getPathwayOrderOf(), visited)); 1188 } else if(biopaxElement instanceof PhysicalEntity ) { 1189 for(PhysicalEntity pe : ((PhysicalEntity)biopaxElement).getMemberPhysicalEntityOf()) 1190 pathways.addAll(getParentPathwaysRecursively(pe, visited)); 1191 for(Interaction it : ((Entity)biopaxElement).getParticipantOf()) 1192 pathways.addAll(getParentPathwaysRecursively(it, visited)); 1193 for(Complex c : ((PhysicalEntity)biopaxElement).getComponentOf()) 1194 pathways.addAll(getParentPathwaysRecursively(c, visited)); 1195 } else if(biopaxElement instanceof EntityReference) { 1196 for(EntityReference er : ((EntityReference) biopaxElement).getMemberEntityReferenceOf()) 1197 pathways.addAll(getParentPathwaysRecursively(er, visited)); 1198 for(SimplePhysicalEntity spe : ((EntityReference) biopaxElement).getEntityReferenceOf()) 1199 pathways.addAll(getParentPathwaysRecursively(spe, visited)); 1200 } else if (biopaxElement instanceof Gene ) { 1201 for(Interaction it : ((Entity) biopaxElement).getParticipantOf()) 1202 pathways.addAll(getParentPathwaysRecursively(it, visited)); 1203 } else { 1204 // ignore 1205 } 1206 1207 return pathways; 1208 } 1209 1210 1211 /** 1212 * Given BioPAX model, for each BioPAX object of the listed classes and their sub-classes, 1213 * such as e.g. Entity and EntityReference (if no types are provided - for all objects in the model), 1214 * creates (parent) an annotation map entry with key: "pathway", value: a set of URIs of parent pathways. 1215 * 1216 * @param model BioPAX model 1217 * @param directParentsOnly use only direct or nearest parent (sub-)pathways of a BioPAX object in the annotation 1218 * @param types optional list of BioPAX types to annotate); default: BioPAXElement.class (i.e., everything) 1219 */ 1220 public static void addPathwayAnnotations( 1221 Model model, boolean directParentsOnly, Class<? extends BioPAXElement>... types) { 1222 //TODO implement addPathwayAnnotations 1223 } 1224 1225 1226 /** 1227 * Merges equivalent interactions. 1228 * 1229 * Note (warning): please check if the result is desirable; 1230 * the result of the merging very much depends on actual pathway data quality... 1231 * 1232 * @param model to edit/update 1233 */ 1234 public static void mergeEquivalentInteractions(Model model) 1235 { 1236 EquivalenceGrouper<Conversion> groups = new EquivalenceGrouper(model.getObjects(Conversion.class)); 1237 1238 for (List<Conversion> group : groups.getBuckets()) 1239 { 1240 if (group.size() > 1) 1241 { 1242 HashSet<Conversion> tobeRemoved = new HashSet<Conversion>(); 1243 Interaction primus = null; 1244 for (Conversion conversion : group) 1245 { 1246 if (primus == null) 1247 { 1248 primus = conversion; 1249 } else 1250 { 1251 copySimplePointers(model, conversion, primus); 1252 Set<Control> controlledOf = conversion.getControlledOf(); 1253 for (Control control : controlledOf) 1254 { 1255 if (!control.getControlled().contains(primus)) 1256 { 1257 control.addControlled(primus); 1258 } 1259 } 1260 Set<Pathway> owners = conversion.getPathwayComponentOf(); 1261 for (Pathway pathway : owners) 1262 { 1263 if(!pathway.getPathwayComponent().contains(primus)) 1264 { 1265 pathway.addPathwayComponent(primus); 1266 } 1267 1268 } 1269 tobeRemoved.add(conversion); 1270 } 1271 } 1272 for (Conversion conversion : tobeRemoved) 1273 { 1274 cleanAllInverse(conversion); 1275 model.remove(conversion); 1276 } 1277 } 1278 } 1279 } 1280 1281 private static void cleanAllInverse(Conversion conversion) 1282 { 1283 Set<PhysicalEntity> concSafe = new HashSet<PhysicalEntity>(conversion.getLeft()); 1284 for (PhysicalEntity pe : concSafe) 1285 { 1286 conversion.removeLeft(pe); 1287 } 1288 concSafe = new HashSet<PhysicalEntity>(conversion.getRight()); 1289 for (PhysicalEntity pe : concSafe) 1290 { 1291 conversion.removeRight(pe); 1292 } 1293 Set<Control> controlledOf = new HashSet<Control>(conversion.getControlledOf()); 1294 for (Control control : controlledOf) 1295 { 1296 control.removeControlled(conversion); 1297 } 1298 Set<Pathway> owners = new HashSet<Pathway>(conversion.getPathwayComponentOf()); 1299 for (Pathway pathway : owners) 1300 { 1301 pathway.removePathwayComponent(conversion); 1302 } 1303 } 1304 1305} 1306 1307 1308