001package org.biopax.paxtools.pattern.util;
002
003import org.biopax.paxtools.model.level3.*;
004
005import java.io.*;
006import java.util.*;
007
008/**
009 * A blacklist is used for not using ubiquitous small molecules in patterns. This class knows how to
010 * read itself from an InputStream, and can write itself to an OutputStream.
011 *
012 * @author Ozgun Babur
013 */
014public class Blacklist
015{
016        /**
017         * Holds ID of blacklisted small molecule's reference. Maps them to the context of ubiquity.
018         * When the context is both INPUT and OUTPUT, it is represented with a null value.
019         */
020        private Map<String, RelType> context;
021
022        /**
023         * Maps IDs of blacklisted small molecule references to their ubiquity scores.
024         */
025        private Map<String, Integer> score;
026
027        /**
028         * The deliminator string in the data.
029         */
030        private static final String DELIM = "\t";
031
032        /**
033         * Constructor for a blank blacklist.
034         */
035        public Blacklist()
036        {
037                context = new HashMap<String, RelType>();
038                score = new HashMap<String, Integer>();
039        }
040
041        /**
042         * Constructor with resource file name.
043         *
044         * @param filename file path to import the blacklist entries from
045         */
046        public Blacklist(String filename)
047        {
048                this();
049                load(filename);
050        }
051
052        /**
053         * Constructor with resource input stream.
054         *
055         * @param is input stream to read/init the blacklist from
056         */
057        public Blacklist(InputStream is)
058        {
059                this();
060                load(is);
061        }
062
063        //----- Section: Input / Output ---------------------------------------------------------------|
064
065        /**
066         * Reads data from the given file.
067         */
068        private void load(String filename)
069        {
070                try
071                {
072                        load(new FileInputStream(filename));
073                }
074                catch (FileNotFoundException e)
075                {
076                        e.printStackTrace();
077                }
078        }
079
080        /**
081         * Reads data from the input stream and loads itself.
082         */
083        private void load(InputStream is)
084        {
085                try
086                {
087                        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
088
089                        for (String line = reader.readLine(); line != null; line = reader.readLine())
090                        {
091                                String[] tok = line.split(DELIM);
092                                if (tok.length >= 3)
093                                {
094                                        addEntry(tok[0], Integer.parseInt(tok[1]), convertContext(tok[2]));
095                                }
096                        }
097
098                        reader.close();
099                }
100                catch (Exception e)
101                {
102                        e.printStackTrace();
103                        context = null;
104                        score = null;
105                }
106        }
107
108        /**
109         * Adds a new blacklisted ID.
110         * @param id ID of the blacklisted molecule
111         * @param score the ubiquity score
112         * @param context context of ubiquity
113         */
114        public void addEntry(String id, int score, RelType context)
115        {
116                this.score.put(id, score);
117                this.context.put(id, context);
118        }
119
120        /**
121         * Gets the IDs of the blacklisted molecules.
122         *
123         * @return IDs
124         */
125        public Set<String> getListed()
126        {
127                return score.keySet();
128        }
129
130        /**
131         * Dumps data to the given file.
132         *
133         * @param filename output file name
134         */
135        public void write(String filename)
136        {
137                try
138                {
139                        write(new FileOutputStream(filename));
140                }
141                catch (FileNotFoundException e)
142                {
143                        e.printStackTrace();
144                }
145        }
146
147        /**
148         * Dumps data to the given output stream.
149         *
150         * @param os output stream
151         */
152        public void write(OutputStream os)
153        {
154                List<String> ids = new ArrayList<String>(score.keySet());
155                final Map<String, Integer> score = this.score;
156                Collections.sort(ids, new Comparator<String>()
157                {
158                        @Override
159                        public int compare(String o1, String o2)
160                        {
161                                return score.get(o2).compareTo(score.get(o1));
162                        }
163                });
164
165                try
166                {
167                        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os));
168
169                        boolean notFirst = false;
170
171                        for (String id : ids)
172                        {
173                                if (notFirst) writer.write("\n");
174                                else notFirst = true;
175
176                                writer.write(id + DELIM + score.get(id) + DELIM + convertContext(context.get(id)));
177                        }
178
179                        writer.close();
180                }
181                catch (IOException e)
182                {
183                        e.printStackTrace();
184                }
185        }
186
187        /**
188         * Converts enum context to text.
189         * @param type context
190         * @return text value
191         */
192        private String convertContext(RelType type)
193        {
194                if (type == null) return "B";
195
196                switch (type)
197                {
198                        case INPUT: return "I";
199                        case OUTPUT: return "O";
200                        default: return "B";
201                }
202        }
203
204        /**
205         * Converts text context to enum.
206         * @param type context
207         * @return enum value
208         */
209        private RelType convertContext(String type)
210        {
211                if (type.equals("I")) return RelType.INPUT;
212                if (type.equals("O")) return RelType.OUTPUT;
213                if (type.equals("B")) return null;
214                throw new IllegalArgumentException("Unknown context: " + type);
215        }
216
217        // --------- Section: Accessory methods -------------------------------------------------------|
218
219        /**
220         * Gets the subset with the least score.
221         */
222        private Set<String> getLeastUbique(Collection<String> ids)
223        {
224                Set<String> select = new HashSet<String>();
225
226                int s = getLeastScore(ids);
227
228                for (String id : ids)
229                {
230                        if (score.get(id) == s) select.add(id);
231                }
232
233                return select;
234        }
235
236        /**
237         * Gets the least score of the given ids.
238         */
239        private int getLeastScore(Collection<String> ids)
240        {
241                int s = Integer.MAX_VALUE;
242
243                for (String id : ids)
244                {
245                        if (score.get(id) < s) s = score.get(id);
246                }
247
248                return s;
249        }
250
251        /**
252         * Gets the context of the ubiquity of the ID. Be careful with the result. If the result is
253         * null, then either the ID may not be ubique, or the ID may be ubique without a context (which
254         * means in both contexts).
255         * @param id ID to check
256         * @return context of ubiquity
257         */
258        public RelType getContext(String id)
259        {
260                return context.get(id);
261        }
262
263        /**
264         * Checks if the given ID is blacklisted in at least one context.
265         */
266        private boolean isUbique(String id)
267        {
268                return isUbique(id, null);
269        }
270
271        /**
272         * Checks if the given ID is blacklisted in both contexts together.
273         */
274        private boolean isUbiqueInBothContexts(String id)
275        {
276                return context.containsKey(id) && context.get(id) == null;
277        }
278
279        /**
280         * Checks if the given ID is blacklisted in the given context.
281         */
282        private boolean isUbique(String id, RelType context)
283        {
284                if (context == null) return this.context.containsKey(id);
285
286                if (!isUbique(id)) return false;
287
288                RelType ctx = this.context.get(id);
289                return ctx == null || ctx.equals(context);
290        }
291
292        /**
293         * Checks if the given entity is blacklisted in at least one context.
294         *
295         * @param pe physical entity BioPAX object
296         * @return true/false
297         */
298        public boolean isUbique(PhysicalEntity pe)
299        {
300                String id = getSMRID(pe);
301                return id != null && isUbique(id);
302        }
303
304        /**
305         * Checks if the given entity is blacklisted in both context together.
306         *
307         * @param pe physical entity BioPAX object
308         * @return true/false
309         */
310        public boolean isUbiqueInBothContexts(PhysicalEntity pe)
311        {
312                String id = getSMRID(pe);
313                return id != null && isUbiqueInBothContexts(id);
314        }
315
316        /**
317         * Checks if the given entity is blacklisted for the given Conversion assuming the Conversion
318         * flows towards the given direction, and the entity is in given context.
319         *
320         * @param pe physical entity BioPAX object
321         * @param conv conversion interaction (BioPAX)
322         * @param dir conversion direction
323         * @param context relationship type - context
324         * @return true/false
325         */
326        public boolean isUbique(PhysicalEntity pe, Conversion conv, ConversionDirectionType dir,
327                RelType context)
328        {
329                String id = getSMRID(pe);
330                if (id == null) return false;
331
332                if (dir == null)
333                        throw new IllegalArgumentException("The conversion direction has to be specified.");
334
335                if (context == null)
336                        throw new IllegalArgumentException("The context has to be only one type.");
337
338                Set<PhysicalEntity> parts;
339
340                if (dir == ConversionDirectionType.REVERSIBLE)
341                {
342                        if (conv.getLeft().contains(pe)) parts = conv.getLeft();
343                        else if (conv.getRight().contains(pe)) parts = conv.getRight();
344                        else throw new IllegalArgumentException("The PhysicalEntity has to be at least one " +
345                                        "side of the Conversion");
346                }
347                else
348                {
349                        parts = dir == ConversionDirectionType.LEFT_TO_RIGHT ?
350                                context == RelType.INPUT ? conv.getLeft() : conv.getRight() :
351                                context == RelType.OUTPUT ? conv.getLeft() : conv.getRight();
352                }
353
354                // if the Conversion direction is reversible, then don't mind the current context
355                if (dir == ConversionDirectionType.REVERSIBLE)
356                        return getUbiques(parts, null).contains(pe);
357                else return getUbiques(parts, context).contains(pe);
358        }
359
360        /**
361         * Gets the ID of the reference of the given entity if it is a small molecule.
362         */
363        private String getSMRID(PhysicalEntity pe)
364        {
365                if (pe instanceof SmallMolecule)
366                {
367                        EntityReference er = ((SmallMolecule) pe).getEntityReference();
368                        if (er != null) return er.getRDFId();
369                }
370                return null;
371        }
372
373        /**
374         * Gets the ubiquitous small molecules among the given set and in the given context. It is
375         * assumed that the given set is either left or right of a Conversion. If there is no
376         * non-ubiquitous element in the set, then the least ubique(s) are removed from the result.
377         * @param entities left or right of a conversion
378         * @param context are these entities input or output
379         * @return ubiquitous small molecules in the given context
380         */
381        public Collection<SmallMolecule> getUbiques(Set<PhysicalEntity> entities, RelType context)
382        {
383                Map<String, SmallMolecule> ubiques = new HashMap<String, SmallMolecule>();
384                boolean allUbiques = true;
385
386                for (PhysicalEntity pe : entities)
387                {
388                        if (pe instanceof SmallMolecule)
389                        {
390                                EntityReference er = ((SmallMolecule) pe).getEntityReference();
391
392                                if (er != null && isUbique(er.getRDFId(), context))
393                                {
394                                        ubiques.put(er.getRDFId(), (SmallMolecule) pe);
395                                }
396                                else
397                                {
398                                        allUbiques = false;
399                                }
400                        }
401                        else allUbiques = false;
402                }
403
404                if (allUbiques && !ubiques.isEmpty())
405                {
406                        Set<String> least = getLeastUbique(ubiques.keySet());
407                        for (String id : least)
408                        {
409                                ubiques.remove(id);
410                        }
411                }
412
413                return ubiques.values();
414        }
415
416        /**
417         * Gets the non-ubiquitous physical entities in the given set and in the given context. It is
418         * assumed that the given set is either left or right of a Conversion. If there is no
419         * non-ubiquitous element in the set, then the least ubique(s) are added to the result.
420         * @param entities left or right of a conversion
421         * @param ctx are these entities input or output
422         * @return non-ubiquitous physical entities in the given context
423         */
424        public Set<PhysicalEntity> getNonUbiques(Set<PhysicalEntity> entities, RelType ctx)
425        {
426                Collection<SmallMolecule> ubiques = getUbiques(entities, ctx);
427                if (ubiques.isEmpty()) return entities;
428
429                Set<PhysicalEntity> result = new HashSet<PhysicalEntity>(entities);
430                result.removeAll(ubiques);
431                return result;
432        }
433
434        public Set getNonUbiqueObjects(Set objects)
435        {
436                Set result = new HashSet();
437                for (Object o : objects)
438                {
439                        if (o instanceof SmallMolecule && !isUbique((SmallMolecule) o)) result.add(o);
440                }
441                return result;
442        }
443}