001package org.biopax.paxtools.controller;
002
003import org.apache.commons.logging.Log;
004import org.apache.commons.logging.LogFactory;
005import org.biopax.paxtools.io.BioPAXIOHandler;
006import org.biopax.paxtools.io.SimpleIOHandler;
007import org.biopax.paxtools.model.BioPAXElement;
008import org.biopax.paxtools.model.BioPAXFactory;
009import org.biopax.paxtools.model.BioPAXLevel;
010import org.biopax.paxtools.model.Model;
011import org.biopax.paxtools.model.level3.*;
012import org.biopax.paxtools.model.level3.Process;
013import org.biopax.paxtools.util.*;
014
015import java.io.ByteArrayInputStream;
016import java.io.ByteArrayOutputStream;
017import java.security.MessageDigest;
018import java.security.NoSuchAlgorithmException;
019import java.util.*;
020import java.util.concurrent.ExecutorService;
021import java.util.concurrent.Executors;
022import java.util.concurrent.TimeUnit;
023
024/**
025 * Several useful algorithms and examples, e.g., to extract root or child
026 * BioPAX L3 elements, remove dangling, replace elements
027 * or URIs, fix/infer property values, etc.
028 * 
029 * NOTE: despite it is public class and has public methods,
030 * this class can be (and has been already) modified (sometimes considerably) 
031 * in every minor revision; it was not designed to be Paxtools' public API...
032 * So, we encourage users copy some methods to their own apps rather than 
033 * depend on this unstable utility class in long term.
034 * 
035 * @author rodche, Arman, Emek
036 */
037public final class ModelUtils
038{
039        private static final Log LOG = LogFactory.getLog(ModelUtils.class);
040
041        /**
042         * Protected Constructor
043         * 
044         * @throws AssertionError always (i.e, if called via java reflection)
045         */
046        ModelUtils() {
047                throw new AssertionError("Not instantiable");
048        }
049
050
051        static final MessageDigest MD5_DIGEST; 
052
053        /**
054         * Initializer.
055         */
056        static {
057                try {
058                        MD5_DIGEST = MessageDigest.getInstance("MD5");
059                } catch (NoSuchAlgorithmException e) {
060                        throw new RuntimeException("Cannot instantiate MD5 MessageDigest!", e);
061                }
062        }
063
064        private final static BioPAXFactory factory = BioPAXLevel.L3.getDefaultFactory();
065
066        private final static EditorMap em = SimpleEditorMap.L3;
067
068        private final static BioPAXIOHandler io = new SimpleIOHandler(BioPAXLevel.L3);
069
070
071        static
072        {
073                ((SimpleIOHandler) io).mergeDuplicates(true);
074                ((SimpleIOHandler) io).normalizeNameSpaces(false);
075        }
076
077
078        /**
079         * Replaces BioPAX elements in the model with ones from the map,
080         * updates corresponding BioPAX object references.
081         * 
082         * It does not neither remove the old nor add new elements in the model
083         * (if required, one can do this before/after this method, e.g., using
084         * the same 'subs' map)
085         * 
086         * This does visit all object properties of each "explicit" element
087         * in the model, but does not traverse deeper into one's sub-properties
088         * to replace something there as well (e.g., nested member entity references
089         * are not replaced unless parent entity reference present in the model)
090         * 
091         * This does not automatically move/migrate old (replaced) object's
092         * children to new objects (the replacement ones are supposed to have
093         * their own properties already set or to be set shortly; otherwise,
094         * consider using of something like {@link #fixDanglingInverseProperties(BioPAXElement, Model)} after.
095         * 
096         * @param model biopax model where the objects are to be replaced
097         * @param subs the replacements map (many-to-one, old-to-new)
098         * @exception IllegalBioPAXArgumentException if there is an incompatible type replacement object
099         */
100        public static void replace(Model model, final Map<? extends BioPAXElement, ? extends BioPAXElement> subs)
101        {
102                // update properties
103                Visitor visitor = new Visitor()
104                {
105                        @Override
106                        public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor)
107                        {
108                                if (editor instanceof ObjectPropertyEditor && range != null && subs.containsKey(range))
109                                {
110                                        BioPAXElement value = (BioPAXElement) range;
111                                        // 'value' is to be replaced with the 'replacement'
112                                        BioPAXElement replacement = subs.get(range); //can get null (ok)
113                                        
114                                        // normal biopax property -
115                                        if (replacement != null && !editor.getRange().isInstance(replacement))
116                                        {
117                                                throw new IllegalBioPAXArgumentException(
118                                                        "Incompatible type! Attempted to replace " 
119                                                        + value.getRDFId() + " (" + value.getModelInterface().getSimpleName() 
120                                                        + ") with " + replacement.getRDFId() + " (" 
121                                                        + replacement.getModelInterface().getSimpleName() + "); "
122                                                        + "property: " + editor.getProperty() 
123                                                        + " of bean: " + domain.getRDFId() + " ("  
124                                                        + domain.getModelInterface().getSimpleName() + ")");
125                                        }
126
127                                        if (replacement != value) 
128                                        {
129                                                editor.removeValueFromBean(value, domain);
130                                                editor.setValueToBean(replacement, domain);
131                                        } else {
132                                                LOG.debug("replace: skipped the identical: " + replacement.getRDFId());
133                                        }
134                                }
135                        }
136                };
137
138                Traverser traverser = new Traverser(em, visitor);
139                for (BioPAXElement bpe : new HashSet<BioPAXElement>(model.getObjects()))
140                {
141                        // update object properties and clear inverse properties using 'subs' map       
142                        traverser.traverse(bpe, null); //model is not needed
143                }
144        }
145
146
147        /**
148         * Finds "root" BioPAX objects that belong to a particular class (incl. sub-classes)
149         * in the model.
150         * 
151         * Note: however, such "root" elements may or may not be, a property of other
152         * elements, not included in the model.
153         * @param model biopax model to work with
154         * @param filterClass filter class (including subclasses)
155         * @param <T> biopax type
156         * @return set of the root biopax objects of given type
157         */
158        public static <T extends BioPAXElement> Set<T> getRootElements(final Model model, final Class<T> filterClass)
159        {
160                // copy all such elements (initially, we think all are roots...)
161                final Set<T> result = new HashSet<T>(model.getObjects(filterClass));
162
163                //"shallow" traverser (direct object properties only - Visitor.visit does not call traverse again) 
164                @SuppressWarnings("unchecked")
165                Traverser traverser = new Traverser(em, 
166                        new Visitor() {
167                                @Override
168                                public void visit(BioPAXElement parent, Object value, Model model,
169                                        PropertyEditor<?, ?> editor)
170                                {
171                                        if (filterClass.isInstance(value)) result.remove(value);
172                                }
173                        }, 
174                        new Filter<PropertyEditor>() {
175                                @Override
176                                public boolean filter(PropertyEditor pe) {
177                                        return (pe instanceof ObjectPropertyEditor);
178                                }
179                });
180                
181                // but we run from every element (all types)
182                for(BioPAXElement e : model.getObjects())
183                        traverser.traverse(e, null);
184
185                return result;
186        }
187
188        
189        /**
190         * Iteratively removes "dangling" elements of given type and its sub-types,
191         * e.g. Xref.class objects, from the BioPAX model. 
192         * 
193         * If the "model" does not contain any root Entity class objects,
194         * and the second parameter is basic UtilityClass.class (i.e., not its sub-class), 
195         * then it simply logs a warning and quits shortly (otherwise, it would 
196         * remove everything from the model). Do not use basic Entity.class either
197         * (but a sub-class is OK) for the same reason (it would delete everything).
198         * 
199         * This, however, does not change relationships
200         * among objects, particularly, some inverse properties,
201         * such as entityReferenceOf or xrefOf, may still
202         * refer to a removed object.
203         * @param model to modify
204         * @param clazz filter-class (filter by this type and sub-classes)
205         * @param <T> biopax type
206         * @return removed objects
207         */
208        public static <T extends BioPAXElement> Set<BioPAXElement> removeObjectsIfDangling(Model model, Class<T> clazz)
209        {
210                final Set<BioPAXElement> removed = new HashSet<BioPAXElement>();
211                
212                // 'equals' below is used intentionally (isAssignableFrom() would be incorrect)
213                if(Entity.class.equals(clazz)) {
214                        LOG.warn("Ignored removeObjectsIfDangling call for: " +
215                                        "Entity.class (it would delete all)");
216                        return removed;
217                }
218                if(UtilityClass.class.equals(clazz) 
219                                && getRootElements(model, Entity.class).isEmpty()) 
220                {
221                        LOG.warn("Ignored removeObjectsIfDangling call: " +
222                                        "no root entities model; UtilityClass.class");
223                        return removed;
224                }
225                
226                Set<T> dangling = getRootElements(model, clazz);        
227                
228                // get rid of dangling objects
229                if (!dangling.isEmpty())
230                {
231                        LOG.info(dangling.size() + " " + clazz.getSimpleName() +
232                                " dangling objects will be deleted...");
233
234                        for (BioPAXElement thing : dangling)
235                        {
236                                model.remove(thing);
237                                removed.add(thing);
238                                LOG.debug("removed (dangling) " + thing.getRDFId() + " (" 
239                                        + thing.getModelInterface().getSimpleName() + ") " + thing);
240                        }
241
242                        // some may have become dangling now, so check again...
243                        removed.addAll(removeObjectsIfDangling(model, clazz));
244                }
245                
246                return removed;
247        }
248
249
250        /**
251         * Cuts the BioPAX model off other models and BioPAX objects
252         * by essentially performing write/read to/from OWL.
253         * The resulting model contains new objects with same IDs
254         * and have object properties "fixed", i.e., dangling values
255         * become null/empty, and inverse properties (e.g. xrefOf)
256         * re-calculated. The original model is unchanged.
257         * 
258         * Note: this method will fail for very large models 
259         * (if resulting RDF/XML utf8 string is longer than approx. 1Gb)
260         * 
261         * @param model biopax model to process
262         * @return copy of the model
263         */
264        public static Model writeRead(Model model)
265        {
266                BioPAXIOHandler io = new SimpleIOHandler(model.getLevel());
267                ByteArrayOutputStream baos = new ByteArrayOutputStream();
268                io.convertToOWL(model, baos);
269                return io.convertFromOWL(new ByteArrayInputStream(baos.toByteArray()));
270        }
271
272
273        /**
274         * Gets direct children of a given BioPAX element
275         * and adds them to a new model.
276         * @param bpe biopax element/object
277         * @return new model
278         */
279        public static Model getDirectChildren(BioPAXElement bpe)
280        {
281                Model m = factory.createModel();
282
283                Traverser traverser = new Traverser(em, new Visitor() {
284                        @Override
285                        public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor<?,?> editor)
286                        {
287                                if (range instanceof BioPAXElement && !model.containsID(((BioPAXElement) range).getRDFId()))
288                                        model.add((BioPAXElement) range);
289                        }
290                });
291
292                traverser.traverse(bpe, m);
293
294                return m;
295        }
296
297
298        /**
299         * Gets all the child BioPAX elements of a given BioPAX element
300         * (using the "tuned" {@link Fetcher}) and adds them to a
301         * new model.
302         * @param bpe biopax object
303         * @param filters property filters (e.g., for Fetcher to skip some properties). Default is to skip 'nextStep'.
304         * @return new biopax Model that contain all the child objects
305         */
306        public static Model getAllChildren(BioPAXElement bpe, 
307                @SuppressWarnings("rawtypes") Filter<PropertyEditor>... filters)
308        {
309                Model m = factory.createModel();
310                if (filters.length == 0)
311                {
312                        new Fetcher(em, Fetcher.nextStepFilter).fetch(bpe, m);
313                } else
314                {
315                        new Fetcher(em, filters).fetch(bpe, m);
316                }
317                m.remove(bpe); // remove the parent
318
319                return m;
320        }
321
322        /**
323         * Collects direct children of a given BioPAX element.
324         * @param bpe biopax object (parent)
325         * @return set of child biopax objects
326         */
327        public static Set<BioPAXElement> getDirectChildrenAsSet(BioPAXElement bpe)
328        {
329                final Set<BioPAXElement> toReturn = new HashSet<BioPAXElement>();
330
331                Traverser traverser = new Traverser(em, new Visitor() {
332                                @Override
333                                public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor<?, ?> editor) {
334                                        if (range instanceof BioPAXElement) {
335                                                toReturn.add((BioPAXElement) range);
336                                        }
337                                }
338                        }
339                );
340
341                traverser.traverse(bpe, null);
342
343                return toReturn;
344        }
345        
346        
347        /**
348         * Generates simple counts of different elements in the model.
349         * 
350         * @param model biopax model to analyze
351         * @return a biopax types - to counts of objects of each type map
352         */
353        public static Map<Class<? extends BioPAXElement>, Integer> generateClassMetrics(Model model)
354        {
355                Map<Class<? extends BioPAXElement>, Integer> metrics = new HashMap<Class<? extends BioPAXElement>, Integer>();
356                for (BioPAXElement bpe : model.getObjects())
357                {
358                        Integer count = metrics.get(bpe.getModelInterface());
359                        if (count == null)
360                        {
361                                count = 1;
362                        } else
363                        {
364                                count = count + 1;
365                        }
366                        metrics.put(bpe.getModelInterface(), count);
367                }
368                return metrics;
369        }
370
371
372        /**
373         * A more strict, type-safe way to ask for a biopax object
374         * from the model, unlike {@link Model#getByID(String)}.
375         * 
376         * @param model biopax model to query
377         * @param uri absolute URI of a biopax element
378         * @param clazz class-filter (to filter by the biopax type and its sub-types)
379         * @param <T> biopax type
380         * @return the biopax object or null (if no such element, or element with this URI is of incompatible type)
381         */
382        public static <T extends BioPAXElement> T getObject(Model model, String uri, Class<T> clazz)
383        {
384                BioPAXElement bpe = model.getByID(uri);
385                if (clazz.isInstance(bpe))
386                {
387                        return (T) bpe;
388                } else
389                {
390                        return null;
391                }
392        }
393
394
395        /**
396         * Calculates MD5 hash code (as 32-byte hex. string).
397         * 
398         * This method is not BioPAX specific. Can be
399         * used for many purposes, such as generating 
400         * new unique URIs, database primary keys, etc.
401         * 
402         * 
403         * @param id some identifier, e.g., URI
404         * @return the 32-byte digest string
405         */
406        public static String md5hex(String id)
407        {
408                byte[] digest = MD5_DIGEST.digest(id.getBytes());
409                StringBuffer sb = new StringBuffer();
410                for (byte b : digest)
411                {
412                        sb.append(Integer.toHexString((int) (b & 0xff) | 0x100).substring(1, 3));
413                }
414                String hex = sb.toString();
415                return hex;
416        }
417
418
419        /**
420         * Unlinks <em>object properties</em> of the BioPAX object
421         * from values the model does not have.
422         * 
423         * @param bpe a biopax object
424         * @param model the model to look for objects in
425         */
426        public static void fixDanglingObjectProperties(BioPAXElement bpe, Model model)
427        {
428                final Visitor visitor = new Visitor()
429                {
430                        @Override
431                        public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor)
432                        {
433                                if (editor instanceof ObjectPropertyEditor)
434                                {
435                                        BioPAXElement value = (BioPAXElement) range;
436                                        if (value != null && !model.containsID(value.getRDFId())) 
437                                                editor.removeValueFromBean(value, domain);
438                                }
439                        }
440                };
441
442                Traverser traverser = new Traverser(em, visitor);
443                traverser.traverse(bpe, model);
444        }
445
446
447        /**
448         * Unlinks <em>inverse properties</em> of the BioPAX object
449         * from values the other model does not have.
450         * @param bpe BioPAX object
451         * @param model where to look for other objects
452         */
453        public static void fixDanglingInverseProperties(BioPAXElement bpe, Model model)
454        {
455                final Visitor visitor = new Visitor()
456                {
457                        @Override
458                        public void visit(BioPAXElement domain, Object range, Model model, PropertyEditor editor)
459                        {
460                                BioPAXElement value = (BioPAXElement) range;
461                                if (value != null && !model.containsID(value.getRDFId()))
462                                        editor.removeValueFromBean(domain, value); //right order!
463                        }
464                };
465
466                TraverserBilinked traverser = new TraverserBilinked(em, visitor);
467                traverser.setInverseOnly(true);
468                traverser.traverse(bpe, model);
469        }
470
471
472        // Moved from FeatureUtils; provides operations for comparing features of physical entities.
473
474        static enum FeatureType
475        {
476                FEATURE,
477                NOT_FEATURE,
478                UNKNOWN_FEATURE;
479        }
480
481        // TODO annotate
482        public static Set<EntityFeature> getFeatureIntersection(PhysicalEntity first, FeatureType firstClass,
483                        PhysicalEntity second, FeatureType secondClass)
484        {
485                Set<EntityFeature> intersection = getFeatureSetByType(first, firstClass);
486                intersection.removeAll(getFeatureSetByType(second, secondClass));
487                return intersection;
488        }
489
490        // TODO annotate
491        public static Set<EntityFeature> getFeatureSetByType(PhysicalEntity pe, FeatureType type)
492        {
493
494                Set<EntityFeature> modifiableSet = new HashSet<EntityFeature>();
495
496                switch (type)
497                {
498                        case FEATURE:
499                                modifiableSet.addAll(pe.getFeature());
500                                break;
501                        case NOT_FEATURE:
502                                modifiableSet.addAll(pe.getNotFeature());
503                                break;
504                        case UNKNOWN_FEATURE:
505                        {
506                                if (pe instanceof SimplePhysicalEntity)
507                                {
508                                        modifiableSet.addAll(((SimplePhysicalEntity) pe).getEntityReference().getEntityFeature());
509                                        modifiableSet.removeAll(pe.getFeature());
510                                        modifiableSet.removeAll(pe.getNotFeature());
511                                }
512                        }
513                }
514                return modifiableSet;
515        }
516
517
518        /**
519         * Finds and adds all (missing) entity features 
520         * to given entity reference from all its owner 
521         * simple physical entities ('feature' and 'notFeature' 
522         * properties).
523         * 
524         * Though, it neither checks for nor resolves any violations 
525         * of the 'entityFeature' property's inverse functional constraint
526         * (i.e., an EntityFeature instance can only belong to one and only one
527         * EntityReference object).  
528         * 
529         * @param er entity reference object
530         * @param fix flag
531         * @return true or false
532         */
533        public static boolean checkERFeatureSet(EntityReference er, boolean fix)
534        {
535                boolean check = true;
536                for (SimplePhysicalEntity spe : er.getEntityReferenceOf())
537                {
538                        for (EntityFeature ef : spe.getFeature())
539                        {
540                                check = scanAndAddToFeatureSet(er, fix, check, ef);
541                                // if not fixing return at first fail, otherwise go on;
542                                if (!fix && !check) return check;
543                        }
544                        for (EntityFeature ef : spe.getNotFeature())
545                        {
546                                check = scanAndAddToFeatureSet(er, fix, check, ef);
547                                // if not fixing return at first fail, otherwise go on;
548                                if (!fix && !check) return check;
549                        }
550                }
551                return check;
552        }
553
554        private static boolean scanAndAddToFeatureSet(EntityReference er, boolean fix, boolean check, EntityFeature ef)
555        {
556                if (!er.getEntityFeature().contains(ef))
557                {
558                        check = false;
559                        if (fix)
560                        {
561                                er.addEntityFeature(ef);
562                                //TODO resolve inverse functional prop. constraint violation (e.g., copy/replace the e.f. before adding if it has entityFeatureOf not null)?
563                        }
564                }
565                return check;
566        }
567
568        // TODO annotate
569        public static Set<EntityFeature> findFeaturesAddedToSecond(PhysicalEntity first, PhysicalEntity second,
570                        boolean fix)
571        {
572
573                if (checkCommonEntityReferenceForTwoPEs(first, second, fix)) return null;
574                Set<EntityFeature> explicit =
575                                getFeatureIntersection(first, FeatureType.NOT_FEATURE, second, FeatureType.FEATURE);
576                Set<EntityFeature> implicit =
577                                getFeatureIntersection(first, FeatureType.UNKNOWN_FEATURE, second, FeatureType.FEATURE);
578                Set<EntityFeature> negativeImplicit =
579                                getFeatureIntersection(first, FeatureType.NOT_FEATURE, second, FeatureType.UNKNOWN_FEATURE);
580
581                if (fix)
582                {
583                        for (EntityFeature implied : implicit)
584                        {
585                                LOG.info("The feature " + implied + "implied as a not-feature of " + first + ". " +
586                                         "Adding it to the not-feature list");
587                                first.addNotFeature(implied);
588                        }
589
590                        for (EntityFeature implied : negativeImplicit)
591                        {
592                                LOG.info("The feature " + implied + "implied as a feature of " + second + ". " +
593                                         "Adding it to the feature list");
594                                second.addFeature(implied);
595                        }
596
597                }
598                explicit.retainAll(implicit);
599                explicit.retainAll(negativeImplicit);
600                return explicit;
601        }
602        
603        private static boolean checkCommonEntityReferenceForTwoPEs(PhysicalEntity first, PhysicalEntity second,
604                        boolean fix)
605        {
606                if (first instanceof SimplePhysicalEntity)
607                {
608                        EntityReference er = ((SimplePhysicalEntity) first).getEntityReference();
609                        if (!er.getEntityReferenceOf().contains(second))
610                        {
611                                LOG.warn("These two physicalEntities do not share an EntityReference. They can not be compared! " +
612                                         "Skipping");
613                                return false;
614                        } else if (!checkERFeatureSet(er, fix))
615                        {
616                                LOG.warn("ER feature set is incomplete!");
617                                if (!fix)
618                                {
619                                        LOG.warn("fixing...");
620                                } else
621                                {
622                                        LOG.warn("skipping");
623                                        return false;
624                                }
625                        }
626                        return true;
627                } else
628                {
629                        LOG.warn("These two physicalEntities do not share an EntityReference. They can not be compared! " +
630                                        "Skipping");
631                        return false;
632                }
633
634        }
635
636
637        /**
638         * Converts generic simple physical entities, 
639         * i.e., physical entities except Complexes 
640         * that have not empty memberPhysicalEntity property,
641         * into equivalent physical entities
642         * with generic entity references (which have members);
643         * this is a better and less error prone way to model
644         * generic molecules in BioPAX L3. 
645         * 
646         * Notes:
647         * Generic Complexes could be normalized in a similar way,
648         * but they do not have entityReference property and might
649         * contain generic (incl. not yet normalized) components, which
650         * makes it complicated.
651         * 
652         * Please avoid using 'memberPhysicalEntity' in your BioPAX L3 models
653         * unless absolutely sure/required, for there is an alternative way 
654         * (using PhysicalEntity/entityReference/memberEntityReference), and 
655         * this will probably be deprecated in the future BioPAX releases.
656         * 
657         * @param model biopax model to fix
658         */
659        public static void normalizeGenerics(Model model)
660        {
661
662                HashMap<Set<EntityReference>, EntityReference> memberMap = new HashMap<Set<EntityReference>,
663                                EntityReference>();
664                Set<SimplePhysicalEntity> pes = model.getObjects(SimplePhysicalEntity.class);
665                Set<SimplePhysicalEntity> pesToBeNormalized = new HashSet<SimplePhysicalEntity>();
666                
667                for (SimplePhysicalEntity pe : pes)
668                {
669                        if (pe.getEntityReference() == null)
670                        {
671                                if (!pe.getMemberPhysicalEntity().isEmpty())
672                                {
673                                        pesToBeNormalized.add(pe);
674                                }
675                        }
676                }
677                
678                for (SimplePhysicalEntity pe : pesToBeNormalized)
679                {
680                        try
681                        {
682                                createNewERandAddMembers(model, pe, memberMap);
683                        }
684                        catch (Exception e)
685                        {
686                                e.printStackTrace();
687                        }
688
689
690                }
691        }
692
693        
694        private static void createNewERandAddMembers(Model model, SimplePhysicalEntity pe,
695                        HashMap<Set<EntityReference>, EntityReference> memberMap)
696        {
697                SimplePhysicalEntity first = (SimplePhysicalEntity) pe.getMemberPhysicalEntity().iterator().next();
698                Set<EntityReference> members = pe.getGenericEntityReferences();
699                EntityReference er = memberMap.get(members);
700                if (er == null)
701                {
702                        EntityReference firstEntityReference = first.getEntityReference();
703                        if (firstEntityReference != null)
704                        {
705                                //generate a new URI in the same namespace (xml:base)
706                                String syntheticId = model.getXmlBase() + md5hex(pe.getRDFId()); 
707                                // create and add a new EntityReference
708                                er = (EntityReference) model.addNew(firstEntityReference.getModelInterface(), syntheticId);
709                                // copy names and xrefs (making orig. unif.xrefs become relat.xrefs)
710                                copySimplePointers(model, pe, er);
711                                
712                                er.addComment("auto-generated by Paxtools from generic " 
713                                                + pe.getModelInterface().getSimpleName()
714                                                + ", uri=" + pe.getRDFId() + "");
715
716                                for (EntityReference member : members)
717                                {
718                                        er.addMemberEntityReference(member);
719                                }
720                                
721                                memberMap.put(members, er);
722                        }
723                }
724                pe.setEntityReference(er);
725        }
726
727        
728        /**
729         * Copies names and xrefs from source to target 
730         * biopax object; it does not copy unification xrefs 
731         * but instead adds relationship xrefs using the same 
732         * db and id values as source's unification xrefs.
733         * 
734         * @param model the biopax model where the source and target objects belong
735         * @param source from
736         * @param target to
737         */
738        public static void copySimplePointers(Model model, Named source, Named target)
739        {
740                target.setDisplayName(source.getDisplayName());
741                target.setStandardName(source.getStandardName());
742                for (String name : source.getName())
743                {
744                        target.addName(name);
745                }
746                for (Xref xref : source.getXref())
747                {
748                        if ((xref instanceof UnificationXref))
749                        {
750                                // generate URI using model's xml:base and xref's properties
751                                String id = model.getXmlBase() + md5hex(xref.getDb()+xref.getRDFId());
752                                Xref byID = (Xref) model.getByID(id);
753                                if (byID == null)
754                                {
755                                        RelationshipXref rref = model.addNew(RelationshipXref.class, id);
756                                        rref.setDb(xref.getDb());
757                                        rref.setId(xref.getId());
758                                        rref.setDbVersion(xref.getDbVersion());
759                                        rref.setIdVersion(xref.getDbVersion());
760                                        xref = rref;
761                                } else
762                                {
763                                        xref = byID;
764                                }
765                        }
766                        
767                        target.addXref(xref);
768                }
769        }
770
771        
772        /**
773         * TODO annotate or deprecate...
774         * 
775         * @param model biopax model to edit
776         */
777        public static void resolveFeatures(Model model)
778        {
779                if (!model.getLevel().equals(BioPAXLevel.L3))
780                {
781                        throw new UnsupportedOperationException(
782                                "resolveFeatures method does not work with " + model.getLevel());
783                } else
784                {
785                        resolveBindingFeatures(model);
786
787                        //For each entity reference:
788                        for (EntityReference er : model.getObjects(EntityReference.class))
789                        {
790                                for (SimplePhysicalEntity spe : er.getEntityReferenceOf())
791                                {
792                                        for (Interaction interaction : spe.getParticipantOf())
793                                        {
794                                                //we will do this left to right
795                                                if (interaction instanceof Conversion)
796                                                {
797                                                        Conversion cnv = (Conversion) (interaction);
798                                                        if (cnv.getLeft().contains(spe))
799                                                        {
800                                                                for (PhysicalEntity physicalEntity : cnv.getRight())
801                                                                {
802                                                                        if (physicalEntity instanceof SimplePhysicalEntity)
803                                                                        {
804                                                                                SimplePhysicalEntity otherSPE = (SimplePhysicalEntity) (physicalEntity);
805                                                                                if (otherSPE.getEntityReference().equals(spe.getEntityReference()))
806                                                                                {
807                                                                                        Set<EntityFeature> added =
808                                                                                                        findFeaturesAddedToSecond(physicalEntity, otherSPE, true);
809                                                                                        Set<EntityFeature> removed =
810                                                                                                        findFeaturesAddedToSecond(otherSPE, physicalEntity, true);
811                                                                                }
812                                                                        }
813                                                                }
814                                                                //TODO HANDLE complexes?
815                                                        }
816                                                }
817                                        }
818                                }
819                        }
820                }
821        }
822
823
824        private static void resolveBindingFeatures(Model model)
825        {
826                ShallowCopy copier = new ShallowCopy(BioPAXLevel.L3);
827
828                //For each Complex
829                Set<Complex> complexes = model.getObjects(Complex.class);
830                for (Complex complex : complexes) {
831                        resolveBindingFeatures(model, complex, copier);
832                }
833        }
834
835        
836        private static void resolveBindingFeatures(Model model, Complex complex, ShallowCopy copier)
837        {
838                Set<PhysicalEntity> components = complex.getComponent();
839                for (PhysicalEntity component : components)
840                {
841                        resolveFeaturesOfComponent(model, complex, component, copier);
842                }
843        }
844
845        private static void resolveFeaturesOfComponent(Model model, Complex complex, PhysicalEntity component,
846                        ShallowCopy copier)
847        {
848                boolean connected = false;
849                Set<EntityFeature> feature = component.getFeature();
850                for (EntityFeature ef : feature)
851                {
852                        if (ef instanceof BindingFeature)
853                        {
854                                BindingFeature bindsTo = ((BindingFeature) ef).getBindsTo();
855                                Set<PhysicalEntity> featureOf = bindsTo.getFeatureOf();
856                                if (!SetEquivalenceChecker.hasEquivalentIntersection(complex.getComponent(), featureOf))
857                                {
858                                        System.err.println(
859                                                        "The Complex" + complex.getName() + "(" + complex.getRDFId() + ") has  component" +
860                                                        component.getDisplayName() + "(" + component.getRDFId() + ") which has" +
861                                                        "a binding feature (" + ef.getRDFId() + "), but none of the bound " +
862                                                        "participants are in this complex");
863                                        //TODO This is an error - fail.
864                                        return;
865                                } else
866                                {
867                                        connected = true;
868                                }
869                        }
870                }
871                if (!connected)
872                {
873                        Set<Interaction> participantOf = component.getParticipantOf();
874                        for (Interaction interaction : participantOf)
875                        {
876                                //It is ok for complex members to control a participant
877                                if (!(interaction instanceof Control))
878                                {
879                                        component = createCopy(model, complex, component, copier);
880                                        break;
881                                }
882                        }
883
884                        BindingFeature bf = model.addNew(BindingFeature.class,
885                                                         component.getRDFId() + "bond" + "in_Complex_" + complex.getRDFId());
886                        component.addFeature(bf);
887                        if (component instanceof SimplePhysicalEntity)
888                        {
889                                ((SimplePhysicalEntity) component).getEntityReference().addEntityFeature(bf);
890                        }
891                }
892        }
893
894        private static PhysicalEntity createCopy(Model model, Complex complex, PhysicalEntity component, ShallowCopy copier)
895        {
896                //This is an aggressive fix - if a complex member is present in both an interaction that is not a control
897                // and a complex, we are creating clone, adding it a binding feature to mark it  and put it  into the
898                // complex and remove the old one.
899                complex.removeComponent(component);
900                component = copier.copy(model, component, component.getRDFId() + "in_Complex_" + complex.getRDFId());
901                complex.addComponent(component);
902                return component;
903        }
904
905
906        /**
907         * This method iterates over the features in a model and tries to find equivalent objects and merges them.
908         * @param model to be fixed
909         */
910        public static void replaceEquivalentFeatures(Model model)
911        {
912
913                EquivalenceGrouper<EntityFeature> equivalents = new EquivalenceGrouper<EntityFeature>();
914                HashMap<EntityFeature, EntityFeature> mapped = new HashMap<EntityFeature, EntityFeature>();
915                HashSet<EntityFeature> scheduled = new HashSet<EntityFeature>();
916
917                for (EntityFeature ef : model.getObjects(EntityFeature.class))
918                {
919                        if (ef.getEntityFeatureOf() == null)
920                        {
921                                inferEntityFromPE(ef, ef.getFeatureOf());
922                                if (ef.getEntityFeatureOf() == null) inferEntityFromPE(ef, ef.getNotFeatureOf());
923                        }
924                        equivalents.add(ef);
925                }
926                for (List<EntityFeature> bucket : equivalents.getBuckets())
927                {
928                        for (int i = 1; i < bucket.size(); i++)
929                        {
930                                EntityFeature ef = bucket.get(i);
931                                if (LOG.isWarnEnabled())
932                                {
933                                        LOG.warn("removing: "+ ef.getRDFId()+ " since it is equivalent to: "+ bucket.get(0));
934                                }
935                                scheduled.add(ef);
936                        }
937                }
938                for (EntityFeature entityFeature : scheduled)
939                {
940                        model.remove(entityFeature);
941                }
942                for (PhysicalEntity physicalEntity : model.getObjects(PhysicalEntity.class))
943                {
944                        Set<EntityFeature> features = new HashSet<EntityFeature>(physicalEntity.getFeature());
945                        for (EntityFeature feature : features)
946                        {
947                                EntityFeature that = mapped.get(feature);
948                                if (that != null && !that.equals(feature))
949                                {
950                                        LOG.debug(" replacing " + feature +
951                                                                          "{" + feature.getRDFId() + "} with " +
952                                                                          that + "{" + that.getRDFId() + "}");
953                                        physicalEntity.removeFeature(feature);
954                                        physicalEntity.addFeature(that);
955                                }
956                        }
957                }
958        }
959
960
961        private static void inferEntityFromPE(EntityFeature ef, Set<PhysicalEntity> pes)
962        {
963
964                for (PhysicalEntity physicalEntity : pes)
965                {
966                        if (physicalEntity instanceof SimplePhysicalEntity)
967                        {
968                                EntityReference er = ((SimplePhysicalEntity) physicalEntity).getEntityReference();
969                                if (er != null)
970                                {
971                                        er.addEntityFeature(ef);
972                                        LOG.debug("Inferred the ER of " + ef.getRDFId() + " as " + er.getRDFId());
973                                        return;
974                                }
975                        }
976                }
977        }
978
979
980        /**
981         * Collects data type (not object) property
982         * values (can be then used for full-text indexing).
983         * 
984         * @param biopaxElement biopax object
985         * @param depth greater or equals 0: 0 means use this object's
986         *        data properties only; 1 - add child's data properties, etc.;
987         *        (the meaning is slightly different from that of Fetcher.fetch(..) method)
988         * @param dataPropertyFilters - biopax data property filters to optionally
989         *                        either skip e.g. properties 'sequence', 'temperature',
990         *                        or only accept 'term', 'comment', 'name', etc.
991         * @return set of keywords
992         */
993        public static Set<String> getKeywords(BioPAXElement biopaxElement, int depth,
994                                                                                  Filter<DataPropertyEditor>... dataPropertyFilters)
995        {
996                LOG.debug("getKeywords called: " + biopaxElement.getRDFId());
997                
998                EditorMap em = SimpleEditorMap.L3;
999                Set<String> ss = new HashSet<String>();
1000
1001                //if depth>0, fetch child biopax objects (ignoring PathwayStep.nextStep property)
1002                Set<BioPAXElement> elms = (depth > 0)
1003                        ? new Fetcher(em, Fetcher.nextStepFilter).fetch(biopaxElement, depth)
1004                                : new HashSet<BioPAXElement>();
1005
1006                //add this one
1007                elms.add(biopaxElement);
1008                
1009                for (BioPAXElement bpe : elms) {
1010                        Set<PropertyEditor> props = em.getEditorsOf(bpe);
1011                        for (PropertyEditor pe : props) {
1012                                //skip for object prop. or one that fails to pass a filter
1013                                if (pe instanceof ObjectPropertyEditor
1014                                                || !filter((DataPropertyEditor)pe, dataPropertyFilters))
1015                                        continue;
1016
1017                                Set values = pe.getValueFromBean(bpe);
1018                                for (Object v : values) {
1019                                        if (!pe.isUnknown(v)) {
1020                                                ss.add(v.toString());
1021                                        }
1022                                }
1023                        }
1024                }
1025                
1026                return ss;
1027        }
1028
1029
1030        private static <T extends PropertyEditor> boolean filter(T pe, Filter<T>... propertyFilters) {
1031                if(propertyFilters.length==0)
1032                        return true;
1033
1034                for(Filter<T> pf : propertyFilters) {
1035                        if (!pf.filter(pe)) {
1036                                return false;
1037                        }
1038                }
1039
1040                return true;
1041        }
1042
1043
1044        /**
1045         * Collects BioSource objects from this or
1046         * related elements (where it makes sense;
1047         * though the biopax element might have no 
1048         * or empty 'organism' property at all.
1049         * 
1050         * The idea is to additionally associate with 
1051         * existing BioSource objects, and thus make 
1052         * filtering by organism possible, for at least 
1053         * Interaction, Protein, Complex, Dna, etc. 
1054         * biopax entities.
1055         * 
1056         * 
1057         * @param biopaxElement biopax object
1058         * @return organism names
1059         */
1060        public static Set<BioSource> getOrganisms(BioPAXElement biopaxElement) {                
1061                final Set<BioSource> biosources = new HashSet<BioSource>();
1062                //shortcut
1063                if(biopaxElement == null)
1064                        return biosources;
1065                
1066                LOG.debug("getOrganisms called: " + biopaxElement.getRDFId());  
1067                                
1068                if(biopaxElement instanceof BioSource) {
1069                        biosources.add((BioSource) biopaxElement);                      
1070                } else if (biopaxElement instanceof Pathway) {                  
1071                        if(((Pathway)biopaxElement).getOrganism() != null)
1072                                biosources.add(((Pathway)biopaxElement).getOrganism());
1073//                      else 
1074//                              //if not set, - infer from children (expensive)
1075//                              biosources.addAll((new Fetcher(em, Fetcher.nextStepFilter))
1076//                                      .fetch(biopaxElement, BioSource.class));
1077                        
1078                } else if (biopaxElement instanceof Gene) {     
1079                        if(((Gene)biopaxElement).getOrganism() != null)
1080                                biosources.add(((Gene) biopaxElement).getOrganism());
1081                } else if (biopaxElement instanceof PathwayStep) {
1082                        Pathway pw = ((PathwayStep) biopaxElement).getPathwayOrderOf();
1083                        if(pw != null && pw.getOrganism() != null)
1084                                biosources.add(pw.getOrganism());
1085                } else if (biopaxElement instanceof Interaction
1086                                || biopaxElement instanceof EntityReference
1087                                || biopaxElement instanceof PhysicalEntity) {
1088                        
1089                        if (biopaxElement instanceof SequenceEntityReference) {
1090                                if(((SequenceEntityReference) biopaxElement).getOrganism() != null)
1091                                        biosources.add(((SequenceEntityReference) biopaxElement).getOrganism());
1092                        }
1093                        
1094                        //get from children (members, participants, components, etc.)
1095                        biosources.addAll((new Fetcher(em, Fetcher.nextStepFilter))
1096                                .fetch(biopaxElement, BioSource.class));                        
1097                } 
1098                
1099                return biosources;
1100        }
1101
1102
1103        /**
1104         * Collects all Provenance objects 
1105         * associated with this one as follows:
1106         * - if the element is Entity (has 'dataSource' property) 
1107         *   or is Provenence itself, get the values and quit;
1108         * - if the biopax element is PathwayStep or EntityReference, 
1109         *   traverse into some of its object/inverse properties to collect 
1110         *   dataSource values from associated entities.
1111         * - return empty set for all other BioPAX types (it is less important 
1112         *   to associate common self-descriptive biopax utility classes with 
1113         *   particular pathway data sources)
1114         * 
1115         * @param biopaxElement a biopax object
1116         * @return Provenance objects set
1117         */
1118        public static Set<Provenance> getDatasources(BioPAXElement biopaxElement) {
1119                
1120                final Set<Provenance> datasources = new HashSet<Provenance>();
1121                
1122                //shortcut
1123                if(biopaxElement == null)
1124                        return datasources;
1125
1126                LOG.debug("getDatasources called: " + biopaxElement.getRDFId());
1127                
1128                if (biopaxElement instanceof Provenance) {                      
1129                        datasources.add((Provenance) biopaxElement);                    
1130                } else if (biopaxElement instanceof Entity) {                   
1131                        datasources.addAll(((Entity) biopaxElement).getDataSource());                   
1132                } else if (biopaxElement instanceof EntityReference) {
1133                        // Let ERs inherit its dataSource from parent PEs or ERs:                       
1134                        for(SimplePhysicalEntity spe : ((EntityReference) biopaxElement).getEntityReferenceOf())
1135                                datasources.addAll(getDatasources(spe));                        
1136                        for(EntityReference er : ((EntityReference) biopaxElement).getMemberEntityReferenceOf())
1137                                datasources.addAll(getDatasources(er));                 
1138                } else if (biopaxElement instanceof PathwayStep) {                      
1139                        datasources.addAll(getDatasources(((PathwayStep) biopaxElement).getPathwayOrderOf()));                          
1140                } else {
1141                        // ignore
1142                }
1143                                
1144                return datasources;
1145        }
1146
1147        
1148        /**
1149         * Collects all parent Pathway objects recursively
1150         * traversing the inverse object properties of the
1151         * biopax element. It ignores all BioPAX types except (incl. sub-classes of):
1152         * Pathway, Interaction, PathwayStep, PhysicalEntity, EntityReference, and Gene.
1153         * 
1154         * @param biopaxElement biopax object
1155         * @return inferred parent pathways
1156         */
1157        public static Set<Pathway> getParentPathways(BioPAXElement biopaxElement) {
1158                final Set<BioPAXElement> visited = new HashSet<BioPAXElement>();
1159                return getParentPathwaysRecursively(biopaxElement, visited);
1160        }
1161
1162        // recursively finds all the parent pathways of the object, while escaping infinite loops
1163        private static Set<Pathway> getParentPathwaysRecursively(
1164                        final BioPAXElement biopaxElement, final Set<BioPAXElement> visited) {
1165
1166                final Set<Pathway> pathways = new HashSet<Pathway>();
1167                
1168                //shortcut, when bpe is null or already processed
1169                if(biopaxElement == null || !visited.add(biopaxElement)) {
1170                        LOG.info("Ignored null or previously visited object:" + biopaxElement);
1171                        return pathways;
1172                }
1173                
1174                LOG.debug("getParentPathways called: " + biopaxElement.getRDFId());
1175
1176                if(biopaxElement instanceof Process) {
1177                        if(biopaxElement instanceof Pathway) // add itself
1178                                pathways.add((Pathway) biopaxElement);
1179                        // continue looking up to parent pathways (until all top ones reached)
1180                        for(Pathway pw : ((Process)biopaxElement).getPathwayComponentOf())
1181                                pathways.addAll(getParentPathwaysRecursively(pw, visited)); //TODO bug: in PC2v8 (thanks to kegg), inf. loop here (StackOverFlow)
1182                        for(Interaction it : ((Process)biopaxElement).getParticipantOf())
1183                                pathways.addAll(getParentPathwaysRecursively(it, visited));
1184                        for(PathwayStep pt : ((Process)biopaxElement).getStepProcessOf())
1185                                pathways.addAll(getParentPathwaysRecursively(pt, visited));
1186                } else if(biopaxElement instanceof PathwayStep) {
1187                        pathways.addAll(getParentPathwaysRecursively(((PathwayStep) biopaxElement).getPathwayOrderOf(), visited));
1188                } else if(biopaxElement instanceof PhysicalEntity ) {
1189                        for(PhysicalEntity pe : ((PhysicalEntity)biopaxElement).getMemberPhysicalEntityOf())
1190                                pathways.addAll(getParentPathwaysRecursively(pe, visited));
1191                        for(Interaction it : ((Entity)biopaxElement).getParticipantOf())
1192                                pathways.addAll(getParentPathwaysRecursively(it, visited));
1193                        for(Complex c : ((PhysicalEntity)biopaxElement).getComponentOf())
1194                                pathways.addAll(getParentPathwaysRecursively(c, visited));
1195                } else if(biopaxElement instanceof EntityReference) {
1196                        for(EntityReference er : ((EntityReference) biopaxElement).getMemberEntityReferenceOf())
1197                                pathways.addAll(getParentPathwaysRecursively(er, visited));
1198                        for(SimplePhysicalEntity spe : ((EntityReference) biopaxElement).getEntityReferenceOf())
1199                                pathways.addAll(getParentPathwaysRecursively(spe, visited));
1200                } else if (biopaxElement instanceof Gene ) { 
1201                        for(Interaction it : ((Entity) biopaxElement).getParticipantOf())
1202                                pathways.addAll(getParentPathwaysRecursively(it, visited));
1203                } else {
1204                        // ignore
1205                }
1206                
1207                return pathways;
1208        }
1209
1210
1211        /**
1212         * Given BioPAX model, for each BioPAX object of the listed classes and their sub-classes,
1213         * such as e.g. Entity and EntityReference (if no types are provided - for all objects in the model),
1214         * creates (parent) an annotation map entry with key: "pathway", value: a set of URIs of parent pathways.
1215         *
1216         * @param model BioPAX model
1217         * @param directParentsOnly use only direct or nearest parent (sub-)pathways of a BioPAX object in the annotation
1218         * @param types optional list of BioPAX types to annotate); default: BioPAXElement.class (i.e., everything)
1219         */
1220        public static void addPathwayAnnotations(
1221                        Model model, boolean directParentsOnly, Class<? extends BioPAXElement>... types) {
1222                //TODO implement addPathwayAnnotations
1223        }
1224
1225
1226        /**
1227         * Merges equivalent interactions.
1228         * 
1229         * Note (warning): please check if the result is desirable; 
1230         * the result of the merging very much depends on actual pathway data quality...
1231         * 
1232         * @param model to edit/update
1233         */
1234        public static void mergeEquivalentInteractions(Model model)
1235        {
1236                EquivalenceGrouper<Conversion> groups = new EquivalenceGrouper(model.getObjects(Conversion.class));
1237
1238                for (List<Conversion> group : groups.getBuckets())
1239                {
1240                        if (group.size() > 1)
1241                        {
1242                                HashSet<Conversion> tobeRemoved = new HashSet<Conversion>();
1243                                Interaction primus = null;
1244                                for (Conversion conversion : group)
1245                                {
1246                                        if (primus == null)
1247                                        {
1248                                                primus = conversion;
1249                                        } else
1250                                        {
1251                                                copySimplePointers(model, conversion, primus);
1252                                                Set<Control> controlledOf = conversion.getControlledOf();
1253                                                for (Control control : controlledOf)
1254                                                {
1255                                                        if (!control.getControlled().contains(primus))
1256                                                        {
1257                                                                control.addControlled(primus);
1258                                                        }
1259                                                }
1260                                                Set<Pathway> owners = conversion.getPathwayComponentOf();
1261                                                for (Pathway pathway : owners)
1262                                                {
1263                                                        if(!pathway.getPathwayComponent().contains(primus))
1264                                                        {
1265                                                                pathway.addPathwayComponent(primus);
1266                                                        }
1267
1268                                                }
1269                                                tobeRemoved.add(conversion);
1270                                        }
1271                                }
1272                                for (Conversion conversion : tobeRemoved)
1273                                {
1274                                        cleanAllInverse(conversion);
1275                                        model.remove(conversion);
1276                                }
1277                        }
1278                }
1279        }
1280
1281        private static void cleanAllInverse(Conversion conversion)
1282        {
1283                Set<PhysicalEntity> concSafe = new HashSet<PhysicalEntity>(conversion.getLeft());
1284                for (PhysicalEntity pe : concSafe)
1285                {
1286                        conversion.removeLeft(pe);
1287                }
1288                concSafe = new HashSet<PhysicalEntity>(conversion.getRight());
1289                for (PhysicalEntity pe : concSafe)
1290                {
1291                        conversion.removeRight(pe);
1292                }
1293                Set<Control> controlledOf = new HashSet<Control>(conversion.getControlledOf());
1294                for (Control control : controlledOf)
1295                {
1296                        control.removeControlled(conversion);
1297                }
1298                Set<Pathway> owners = new HashSet<Pathway>(conversion.getPathwayComponentOf());
1299                for (Pathway pathway : owners)
1300                {
1301                        pathway.removePathwayComponent(conversion);
1302                }
1303        }
1304
1305}
1306
1307
1308