001package org.biopax.paxtools.pattern.miner;
002
003import org.biopax.paxtools.controller.PathAccessor;
004import org.biopax.paxtools.model.BioPAXElement;
005import org.biopax.paxtools.model.level3.*;
006import org.biopax.paxtools.pattern.Match;
007import org.biopax.paxtools.pattern.Pattern;
008import org.biopax.paxtools.pattern.constraint.HasAnID;
009import org.biopax.paxtools.pattern.util.Blacklist;
010import org.biopax.paxtools.pattern.util.PhysicalEntityChain;
011import org.biopax.paxtools.pattern.util.HGNC;
012
013import java.io.IOException;
014import java.io.OutputStream;
015import java.io.OutputStreamWriter;
016import java.util.*;
017
018/**
019 * Adapter class for a miner.
020 *
021 * @author Ozgun Babur
022 */
023public abstract class MinerAdapter implements Miner
024{
025        /**
026         * Name of the miner.
027         */
028        protected String name;
029
030        /**
031         * Description of the miner.
032         */
033        protected String description;
034
035        /**
036         * Pattern to use for mining.
037         */
038        protected Pattern pattern;
039
040        /**
041         * Blacklist for identifying ubiquitous small molecules.
042         */
043        protected Blacklist blacklist;
044
045        /**
046         * ID fetcher is used for skipping objects that cannot generate a valid ID during the search.
047         */
048        protected IDFetcher idFetcher;
049
050        /**
051         * Memory for object IDs. This is needed for performance issues. Without this, half of SIF
052         * conversion is spent for fetchIDs().
053         */
054        protected Map<BioPAXElement, Set<String>> idMap;
055
056        /**
057         * Constructor with name and description.
058         * @param name name of the miner
059         * @param description description of the miner
060         */
061        protected MinerAdapter(String name, String description)
062        {
063                this.name = name;
064                this.description = description;
065                this.idMap = new HashMap<BioPAXElement, Set<String>>();
066        }
067
068        /**
069         * Sets the blacklist to use during SIF search.
070         * @param blacklist for identifying ubiquitous small molecules
071         */
072        public void setBlacklist(Blacklist blacklist)
073        {
074                this.blacklist = blacklist;
075        }
076
077
078        /**
079         * Sets the ID fetcher to use during SIF search.
080         * @param idFetcher ID generator from BioPAX object
081         */
082        public void setIDFetcher(IDFetcher idFetcher)
083        {
084                this.idFetcher = idFetcher;
085        }
086
087        /**
088         * Constructs the pattern to use for mining.
089         * @return the pattern
090         */
091        public abstract Pattern constructPattern();
092
093        /**
094         * Gets the pattern, constructs if null.
095         * @return pattern
096         */
097        public Pattern getPattern()
098        {
099                if (pattern == null)
100                {
101                        pattern = constructPattern();
102
103                        if (this instanceof SIFMiner && idFetcher != null && idMap != null)
104                        {
105                                pattern.add(new HasAnID(idFetcher, idMap), ((SIFMiner) this).getSourceLabel());
106                                pattern.add(new HasAnID(idFetcher, idMap), ((SIFMiner) this).getTargetLabel());
107                        }
108
109                        pattern.optimizeConstraintOrder();
110                }
111
112                return pattern;
113        }
114
115        /**
116         * Gets the name of the miner.
117         * @return name
118         */
119        public String getName()
120        {
121                return name;
122        }
123
124        /**
125         * Gets the description of the miner.
126         * @return description
127         */
128        public String getDescription()
129        {
130                return description;
131        }
132
133        public void setName(String name)
134        {
135                this.name = name;
136        }
137
138        public void setDescription(String description)
139        {
140                this.description = description;
141        }
142
143        public Map<BioPAXElement, Set<String>> getIdMap()
144        {
145                return idMap;
146        }
147
148        public void setIdMap(Map<BioPAXElement, Set<String>> idMap)
149        {
150                this.idMap = idMap;
151        }
152
153        /**
154         * Uses the name as sting representation of the miner.
155         * @return name
156         */
157        @Override
158        public String toString()
159        {
160                return getName();
161        }
162
163        //----- Section: Helper methods ---------------------------------------------------------------|
164
165        /**
166         * Searches for the gene symbol of the given EntityReference.
167         * @param pr to search for a symbol
168         * @return symbol
169         */
170        protected String getGeneSymbol(ProteinReference pr)
171        {
172                for (Xref xr : pr.getXref())
173                {
174                        String db = xr.getDb();
175                        if (db != null)
176                        {
177                                db = db.toLowerCase();
178                                if (db.startsWith("hgnc"))
179                                {
180                                        String id = xr.getId();
181                                        if (id != null)
182                                        {
183                                                String symbol = HGNC.getSymbol(id);
184                                                if (symbol != null && !symbol.isEmpty())
185                                                {
186                                                        return symbol;
187                                                }
188                                        }
189                                }
190                        }
191                }
192
193                return null;
194        }
195
196        /**
197         * Searches for the uniprot name of the given human EntityReference.
198         * @param pr to search for the uniprot name
199         * @return uniprot name
200         */
201        protected String getUniprotNameForHuman(ProteinReference pr)
202        {
203                for (String name : pr.getName())
204                {
205                        if (name.endsWith("_HUMAN")) return name;
206                }
207                return null;
208        }
209
210        /**
211         * Searches for the gene symbol of the given EntityReference.
212         * @param m current match
213         * @param label label of the related EntityReference in the pattern
214         * @return symbol
215         */
216        protected String getGeneSymbol(Match m, String label)
217        {
218                ProteinReference pr = (ProteinReference) m.get(label, getPattern());
219                return getGeneSymbol(pr);
220        }
221
222        /**
223         * Searches for the uniprot name of the given human EntityReference.
224         * @param m current match
225         * @param label label of the related EntityReference in the pattern
226         * @return uniprot name
227         */
228        protected String getUniprotNameForHuman(Match m, String label)
229        {
230                ProteinReference er = (ProteinReference) m.get(label, getPattern());
231                return getUniprotNameForHuman(er);
232        }
233
234        /**
235         * Checks if the type of a Control is inhibition.
236         * @param ctrl Control to check
237         * @return true if type is inhibition related
238         */
239        public boolean isInhibition(Control ctrl)
240        {
241                return ctrl.getControlType() != null && ctrl.getControlType().toString().startsWith("I");
242        }
243
244        //----- Section: Mining modifications ---------------------------------------------------------|
245
246        /**
247         * Accessor for modification features.
248         */
249        private static final PathAccessor FEAT_ACC =
250                new PathAccessor("PhysicalEntity/feature:ModificationFeature");
251
252        /**
253         * Accessor for modification terms.
254         */
255        private static final PathAccessor TERM_ACC =
256                new PathAccessor("ModificationFeature/modificationType/term");
257
258        /**
259         * Accessor to sequence site of modification.
260         */
261        private static final PathAccessor SITE_ACC =
262                new PathAccessor("ModificationFeature/featureLocation:SequenceSite/sequencePosition");
263
264        /**
265         * Accessor to sequence interval begin site of modification.
266         */
267        private static final PathAccessor INTERVAL_BEGIN_ACC = new PathAccessor(
268                "ModificationFeature/featureLocation:SequenceInterval/sequenceIntervalBegin/sequencePosition");
269
270        /**
271         * Accessor to sequence interval end site of modification.
272         */
273        private static final PathAccessor INTERVAL_END_ACC = new PathAccessor(
274                "ModificationFeature/featureLocation:SequenceInterval/sequenceIntervalEnd/sequencePosition");
275
276        /**
277         * Sorts the modifications and gets them in a String.
278         * @param set modifications
279         * @return a String listing the modifications
280         */
281        public Set<String> toStringSet(Set<ModificationFeature> set)
282        {
283                List<ModificationFeature> list = new ArrayList<ModificationFeature>(set);
284
285                Collections.sort(list, new Comparator<ModificationFeature>()
286                {
287                        @Override
288                        public int compare(ModificationFeature o1, ModificationFeature o2)
289                        {
290                                String t1 = getModificationTerm(o1);
291                                String t2 = getModificationTerm(o2);
292
293                                Integer l1 = getPositionStart(o1);
294                                Integer l2 = getPositionStart(o2);
295
296                                if (t1 == null && t2 == null) return l1.compareTo(l2);
297                                if (t1 == null) return 1;
298                                if (t2 == null) return -1;
299                                if (t1.equals(t2)) return l1.compareTo(l2);
300                                return t1.compareTo(t2);
301                        }
302                });
303
304                return getInString(list);
305        }
306
307        /**
308         * Gets the modifications is a string that is separated with comma.
309         * @param list modification list
310         * @return String representing the modifications
311         */
312        private Set<String> getInString(List<ModificationFeature> list)
313        {
314                Set<String> text = new HashSet<String>(list.size());
315
316                for (ModificationFeature mf : list)
317                {
318                        String term = getModificationTerm(mf);
319                        String loc = getPositionInString(mf);
320
321                        if (term != null)
322                        {
323                                String s = term + loc;
324                                if (!text.contains(s)) text.add(s);
325                        }
326                }
327                return text;
328        }
329
330        /**
331         * Gets the String term of the modification feature.
332         * @param mf modification feature
333         * @return modification term
334         */
335        public String getModificationTerm(ModificationFeature mf)
336        {
337                Set vals = TERM_ACC.getValueFromBean(mf);
338                if (vals.isEmpty()) return null;
339                return vals.iterator().next().toString();
340        }
341
342        /**
343         * Gets the first position of the modification feature.
344         * @param mf modification feature
345         * @return first location
346         */
347        public int getPositionStart(ModificationFeature mf)
348        {
349                Set vals = SITE_ACC.getValueFromBean(mf);
350
351                if (!vals.isEmpty())
352                {
353                        return ((Integer) vals.iterator().next());
354                }
355
356                vals = INTERVAL_BEGIN_ACC.getValueFromBean(mf);
357
358                if (!vals.isEmpty())
359                {
360                        return ((Integer) vals.iterator().next());
361                }
362
363                return -1;
364        }
365
366        /**
367         * Gets the position of the modification feature as a String.
368         * @param mf modification feature
369         * @return location
370         */
371        public String getPositionInString(ModificationFeature mf)
372        {
373                Set vals = SITE_ACC.getValueFromBean(mf);
374
375                if (!vals.isEmpty())
376                {
377                        int x = ((Integer) vals.iterator().next());
378                        if (x > 0) return "@" + x;
379                }
380
381                vals = INTERVAL_BEGIN_ACC.getValueFromBean(mf);
382
383                if (!vals.isEmpty())
384                {
385                        int begin = ((Integer) vals.iterator().next());
386
387                        vals = INTERVAL_END_ACC.getValueFromBean(mf);
388
389                        if (!vals.isEmpty())
390                        {
391                                int end = ((Integer) vals.iterator().next());
392
393                                if (begin > 0 && end > 0 && begin <= end)
394                                {
395                                        if (begin == end) return "@"  + begin;
396                                        else return "@" + "[" + begin + "-" + end + "]";
397                                }
398                        }
399                }
400
401                return "";
402        }
403
404        /**
405         * Gets modifications of the given element in a string. The element has to be a PhysicalEntity.
406         * @param m match
407         * @param label label of the PhysicalEntity
408         * @return modifications
409         */
410        protected Set<String> getModifications(Match m, String label)
411        {
412                PhysicalEntity pe = (PhysicalEntity) m.get(label, getPattern());
413                return toStringSet(new HashSet<ModificationFeature>(FEAT_ACC.getValueFromBean(pe)));
414        }
415
416        /**
417         * Gets modifications of the given elements in a string set. The elements has to be a
418         * PhysicalEntity and they must be two ends of a chain with homology and/or complex membership
419         * relations.
420         * @param m match
421         * @param memLabel the member-end of the PhysicalEntity chain
422         * @param comLabel the complex-end of the PhysicalEntity chain
423         * @return modifications
424         */
425        protected Set<String> getModifications(Match m, String memLabel, String comLabel)
426        {
427                PhysicalEntityChain chain = getChain(m, memLabel, comLabel);
428                return toStringSet(chain.getModifications());
429        }
430
431        /**
432         * Gets cellular locations of the given elements in a string set. The elements has to be a
433         * PhysicalEntity and they must be two ends of a chain with homology and/or complex membership
434         * relations.
435         * @param m match
436         * @param memLabel the member-end of the PhysicalEntity chain
437         * @param comLabel the complex-end of the PhysicalEntity chain
438         * @return cellular locations
439         */
440        protected Set<String> getCellularLocations(Match m, String memLabel, String comLabel)
441        {
442                PhysicalEntityChain chain = getChain(m, memLabel, comLabel);
443                return chain.getCellularLocations();
444        }
445
446        /**
447         * Gets delta modifications of the given elements in string sets. The elements has to be two
448         * PhysicalEntity chains. The result array is composed of two strings: gained (0) and lost (1).
449         *
450         * @param m match
451         * @param memLabel1 the member-end of the first PhysicalEntity chain
452         * @param comLabel1 the complex-end of the first PhysicalEntity chain
453         * @param memLabel2 the member-end of the second PhysicalEntity chain
454         * @param comLabel2 the complex-end of the second PhysicalEntity chain
455         * @return delta modifications
456         */
457        protected Set<String>[] getDeltaModifications(Match m, String memLabel1, String comLabel1,
458                String memLabel2, String comLabel2)
459        {
460                PhysicalEntityChain chain1 = getChain(m, memLabel1, comLabel1);
461                PhysicalEntityChain chain2 = getChain(m, memLabel2, comLabel2);
462
463                Set<ModificationFeature> before = chain1.getModifications();
464                Set<ModificationFeature> after = chain2.getModifications();
465
466                Set<String> afterMods = toStringSet(after);
467                Set<String> beforeMods = toStringSet(before);
468                removeCommon(afterMods, beforeMods);
469
470                return new Set[]{afterMods, beforeMods};
471        }
472
473        /**
474         * Gets delta compartments of the given two PE chains. The result array is composed of two
475         * string sets: gained (0) and lost (1).
476         *
477         * @param m match
478         * @param memLabel1 the member-end of the first PhysicalEntity chain
479         * @param comLabel1 the complex-end of the first PhysicalEntity chain
480         * @param memLabel2 the member-end of the second PhysicalEntity chain
481         * @param comLabel2 the complex-end of the second PhysicalEntity chain
482         * @return delta compartments
483         */
484        protected Set<String>[] getDeltaCompartments(Match m, String memLabel1, String comLabel1,
485                String memLabel2, String comLabel2)
486        {
487                PhysicalEntityChain chain1 = getChain(m, memLabel1, comLabel1);
488                PhysicalEntityChain chain2 = getChain(m, memLabel2, comLabel2);
489
490                Set<String> before = chain1.getCellularLocations();
491                Set<String> after = chain2.getCellularLocations();
492                removeCommon(after, before);
493
494                return new Set[]{after, before};
495        }
496
497        protected PhysicalEntityChain getChain(Match m, String memLabel, String comLabel)
498        {
499                return new PhysicalEntityChain((PhysicalEntity) m.get(memLabel, getPattern()),
500                        (PhysicalEntity)m.get(comLabel, getPattern()));
501        }
502
503        protected void removeCommon(Set<String> set1, Set<String> set2)
504        {
505                Set<String> common = new HashSet<String>(set1);
506                common.retainAll(set2);
507                set1.removeAll(common);
508                set2.removeAll(common);
509        }
510
511        /**
512         * Converts the set of string to a single string.
513         * @param set the set
514         * @param sep separator string
515         * @return concatenated string
516         */
517        protected String concat(Set<String> set, String sep)
518        {
519                String s = "";
520
521                int i = set.size();
522                for (String ss : set)
523                {
524                        s += ss;
525                        if (--i > 0) s += sep;
526                }
527                return s;
528        }
529
530        /**
531         * Identifies negative and positive controls. Assumes positive by default.
532         * @param ctrl control to check
533         * @return sign
534         */
535        protected int sign(Control ctrl)
536        {
537                ControlType type = ctrl.getControlType();
538                if (type != null && type.name().startsWith("I")) return -1;
539                return 1;
540        }
541
542        /**
543         * Checks the cumulative sign of the chained controls.
544         * @param m result match
545         * @param ctrlLabel labels for controls
546         * @return sign
547         */
548        protected int sign(Match m, String... ctrlLabel)
549        {
550                int sign = 1;
551
552                for (String lab : ctrlLabel)
553                {
554                        Control ctrl = (Control) m.get(lab, getPattern());
555                        sign *= sign(ctrl);
556                }
557                return sign;
558        }
559
560        /**
561         * Checks if a PE chain is labeled as inactive.
562         * @param m the result match
563         * @param simpleLabel simple end of the chain
564         * @param complexLabel complex end of the chain
565         * @return true if labeled inactive
566         */
567        protected boolean labeledInactive(Match m, String simpleLabel, String complexLabel)
568        {
569                PhysicalEntityChain chain = getChain(m, simpleLabel, complexLabel);
570                PhysicalEntityChain.Activity activity = chain.checkActivityLabel();
571                return activity == PhysicalEntityChain.Activity.INACTIVE;
572        }
573
574        //----- Section: Result as SIF format ---------------------------------------------------------|
575
576        /**
577         * This method writes the output as pairs of gene symbols of the given two ProteinReference.
578         * Parameters labels have to map to ProteinReference.
579         * @param matches the search result
580         * @param out output stream for text output
581         * @param directed if true, reverse pairs is treated as different pairs
582         * @param label1 label for the first ProteinReference in the result matches
583         * @param label2 label for the second ProteinReference in the result matches
584         * @throws IOException if cannot write to output stream
585         */
586        public void writeResultAsSIF(Map<BioPAXElement, List<Match>> matches, OutputStream out,
587                boolean directed, String label1, String label2) throws IOException
588        {
589                if (matches.isEmpty()) return;
590                if (this instanceof SIFMiner)
591                {
592                        writeSIFsUsingSIFFramework(matches, out);
593                        return;
594                }
595
596                // Memory for already written pairs.
597                Set<String> mem = new HashSet<String>();
598
599                String mid = getRelationType() == null ? "\t" : "\trelation\t";
600
601                OutputStreamWriter writer = new OutputStreamWriter(out);
602                String header = getHeader();
603                writer.write(header  == null ? label1 + mid + label2 : header);
604
605                for (BioPAXElement ele : matches.keySet())
606                {
607                        for (Match m : matches.get(ele))
608                        {
609                                Set<String> s1 = getIdentifiers(m, label1);
610                                Set<String> s2 = getIdentifiers(m, label2);
611
612                                for (String s1s : s1)
613                                {
614                                        for (String s2s : s2)
615                                        {
616                                                String type = getRelationType();
617                                                String sep = type == null ? "\t" : "\t" + type + "\t";
618
619                                                String relation = s1s + sep + s2s;
620                                                String reverse = s2s + sep + s1s;
621
622                                                if (!mem.contains(relation) && (directed || !mem.contains(reverse)))
623                                                {
624                                                        writer.write("\n" + relation);
625                                                        mem.add(relation);
626                                                        if (!directed) mem.add(reverse);
627                                                }
628                                        }
629                                }
630                        }
631                }
632                writer.flush();
633        }
634
635        /**
636         * This method writes the output as pairs of gene symbols of the given two ProteinReference.
637         * Parameters labels have to map to ProteinReference.
638         * @param matches the search result
639         * @param out output stream for text output
640         * @throws IOException if cannot write to output stream
641         */
642        public void writeSIFsUsingSIFFramework(Map<BioPAXElement, List<Match>> matches,
643                OutputStream out) throws IOException
644        {
645                Map<SIFInteraction, SIFInteraction> sifMap = new HashMap<SIFInteraction, SIFInteraction>();
646
647                for (List<Match> matchList : matches.values())
648                {
649                        for (Match match : matchList)
650                        {
651                                for (SIFInteraction inter : this.createSIFInteraction(match, new CommonIDFetcher()))
652                                {
653                                        if (inter.hasIDs())
654                                        {
655                                                if (sifMap.containsKey(inter))
656                                                {
657                                                        sifMap.get(inter).mergeWith(inter);
658                                                }
659                                                else sifMap.put(inter, inter);
660                                        }
661                                }
662                        }
663                }
664                OutputStreamWriter writer = new OutputStreamWriter(out);
665
666                boolean first = true;
667                for (SIFInteraction inter : sifMap.keySet())
668                {
669                        if (first) first = false;
670                        else writer.write("\n");
671
672                        writer.write(inter.toString());
673                }
674                writer.flush();
675        }
676
677        /**
678         * Checks if the relation captured by match has a type. THis method just returns null but any
679         * child class using <code>writeResultAsSIF</code> method can implement this to have a
680         * relationship type between gene symbol pairs.
681         * @return type of the relation
682         */
683        public String getRelationType()
684        {
685                if (this instanceof SIFMiner)
686                {
687                        return ((SIFMiner) this).getSIFType().getTag();
688                }
689                else return null;
690        }
691
692        /**
693         * Gets the first line of the result file. This method should be overridden to customize the
694         * header of the result file.
695         * @return header
696         */
697        public String getHeader()
698        {
699                return null;
700        }
701
702        //----- Section: Result more detailed than SIF ------------------------------------------------|
703
704        /**
705         * Writes the result as a tab delimited format, where the column values are customized.
706         * @param matches result matches
707         * @param out output stream
708         * @param columns number of columns in the result
709         * @throws IOException if cannot write to the stream
710         */
711        public void writeResultDetailed(Map<BioPAXElement, List<Match>> matches, OutputStream out,
712                int columns) throws IOException
713        {
714                OutputStreamWriter writer = new OutputStreamWriter(out);
715
716                // write the header
717
718                String header = getHeader();
719                if (header != null)
720                {
721                        writer.write(header);
722                }
723                else
724                {
725                        for (int i = 0; i < columns; i++)
726                        {
727                                writer.write("col-" + (i+1));
728                                if (i < columns - 1) writer.write("\t");
729                        }
730                }
731
732                // memory for already written lines
733                Set<String> mem = new HashSet<String>();
734
735                // write values
736
737                for (BioPAXElement ele : matches.keySet())
738                {
739                        for (Match m : matches.get(ele))
740                        {
741                                String line = "";
742                                boolean aborted = false;
743
744                                for (int i = 0; i < columns; i++)
745                                {
746                                        String s = getValue(m, i);
747
748                                        if (s == null)
749                                        {
750                                                aborted = true;
751                                                break;
752                                        }
753                                        else
754                                        {
755                                                line += s + "\t";
756                                        }
757                                }
758
759                                if (aborted) continue;
760
761                                line = line.trim();
762
763                                if (!mem.contains(line))
764                                {
765                                        writer.write("\n" + line);
766                                        mem.add(line);
767                                }
768                        }
769                }
770                writer.flush();
771        }
772
773        /**
774         * This method has to be overridden if <code>writeResultDetailed</code> method is used. It
775         * creates the column value of the given Match. If this method returns <code>null</code> for any
776         * column, then the current match is ignored.
777         * @param m current match
778         * @param col current column
779         * @return column value
780         */
781        public String getValue(Match m, int col)
782        {
783                return null;
784        }
785
786        /**
787         * Creates a SIF interaction for the given match.
788         * @param m match to use for SIF creation
789         * @param fetcher ID generator from BioPAX object
790         * @return SIF interaction
791         */
792        public Set<SIFInteraction> createSIFInteraction(Match m, IDFetcher fetcher)
793        {
794                BioPAXElement sourceER = m.get(((SIFMiner) this).getSourceLabel(), getPattern());
795                BioPAXElement targetER = m.get(((SIFMiner) this).getTargetLabel(), getPattern());
796
797                Set<String> sources = fetchIDs(sourceER, fetcher);
798                Set<String> targets = fetchIDs(targetER, fetcher);
799
800                SIFType sifType = ((SIFMiner) this).getSIFType();
801
802                Set<SIFInteraction> set = new HashSet<SIFInteraction>();
803
804                for (String source : sources)
805                {
806                        for (String target : targets)
807                        {
808                                if (source.equals(target)) continue;
809                                else if (sifType.isDirected() || source.compareTo(target) < 0)
810                                {
811                                        set.add(new SIFInteraction(source, target, sourceER, targetER, sifType,
812                                                new HashSet<BioPAXElement>(m.get(getMediatorLabels(), getPattern())),
813                                                new HashSet<BioPAXElement>(m.get(getSourcePELabels(), getPattern())),
814                                                new HashSet<BioPAXElement>(m.get(getTargetPELabels(), getPattern()))));
815                                }
816                                else
817                                {
818                                        set.add(new SIFInteraction(target, source, targetER, sourceER, sifType,
819                                                new HashSet<BioPAXElement>(m.get(getMediatorLabels(), getPattern())),
820                                                new HashSet<BioPAXElement>(m.get(getTargetPELabels(), getPattern())),
821                                                new HashSet<BioPAXElement>(m.get(getSourcePELabels(), getPattern()))));
822                                }
823                        }
824                }
825                return set;
826        }
827
828        protected Set<String> fetchIDs(BioPAXElement ele, IDFetcher fetcher)
829        {
830                if (!idMap.containsKey(ele))
831                {
832                        Set<String> ids = fetcher.fetchID(ele);
833                        idMap.put(ele, ids);
834                }
835                return idMap.get(ele);
836        }
837
838        /**
839         * If a SIF miner wants to tell which essential BioPAX elements mediated this relation, then
840         * they need to override this method and pass the labels of elements.
841         * @return labels of elements to collect publication refs
842         */
843        public String[] getMediatorLabels()
844        {
845                return null;
846        }
847
848        /**
849         * If a SIF miner wants to tell which PhysicalEntity objects acted as source of the relation,
850         * they need to override this method and pass the labels of elements.
851         * @return labels of elements
852         */
853        public String[] getSourcePELabels()
854        {
855                return null;
856        }
857
858        /**
859         * If a SIF miner wants to tell which PhysicalEntity objects acted as source of the relation,
860         * they need to override this method and pass the labels of elements.
861         * @return labels of elements
862         */
863        public String[] getTargetPELabels()
864        {
865                return null;
866        }
867
868        /**
869         * Uses uniprot name or gene symbol as identifier.
870         * @param m current match
871         * @param label label of the related EntityReference in the pattern
872         * @return identifier
873         */
874        public Set<String> getIdentifiers(Match m, String label)
875        {
876                BioPAXElement el = m.get(label, getPattern());
877
878                if (idFetcher != null) return idFetcher.fetchID(el);
879
880                Set<String> set = new HashSet<String>();
881                if (el instanceof ProteinReference)
882                {
883//                      set.add(getUniprotNameForHuman(m, label));
884                        set.add(getGeneSymbol((ProteinReference) el));
885                }
886                else if (el instanceof SmallMoleculeReference)
887                {
888                        set.add(getCompoundName((SmallMoleculeReference) el));
889                }
890
891                return set;
892        }
893
894        /**
895         * Gets the name of the small molecule to use in SIF.
896         * @param smr small molecule ref
897         * @return a name
898         */
899        public String getCompoundName(SmallMoleculeReference smr)
900        {
901                return smr.getDisplayName();
902        }
903}