001package org.biopax.paxtools.pattern;
002
003import org.biopax.paxtools.controller.Cloner;
004import org.biopax.paxtools.controller.Completer;
005import org.biopax.paxtools.controller.SimpleEditorMap;
006import org.biopax.paxtools.io.BioPAXIOHandler;
007import org.biopax.paxtools.io.SimpleIOHandler;
008import org.biopax.paxtools.model.BioPAXElement;
009import org.biopax.paxtools.model.BioPAXLevel;
010import org.biopax.paxtools.model.Model;
011import org.biopax.paxtools.model.level3.*;
012import org.biopax.paxtools.model.level3.Process;
013import org.biopax.paxtools.pattern.util.ProgressWatcher;
014
015import java.io.FileInputStream;
016import java.io.FileNotFoundException;
017import java.io.FileOutputStream;
018import java.util.*;
019
020/**
021 * Searcher for searching a given pattern in a model.
022 *
023 * @author Ozgun Babur
024 */
025public class Searcher
026{
027        /**
028         * Searches the pattern starting from the given match. The first element of the match should be
029         * assigned. Others are optional.
030         * @param m match to start from
031         * @param pattern pattern to search
032         * @return result matches
033         */
034        public static List<Match> search(Match m, Pattern pattern)
035        {
036                assert pattern.getStartingClass().isAssignableFrom(m.get(0).getModelInterface());
037
038                return searchRecursive(m, pattern.getConstraints(), 0);
039        }
040
041        /**
042         * Searches the pattern starting from the given element.
043         * @param ele element to start from
044         * @param pattern pattern to search
045         * @return matching results
046         */
047        public static List<Match> search(BioPAXElement ele, Pattern pattern)
048        {
049                assert pattern.getStartingClass().isAssignableFrom(ele.getModelInterface());
050
051                Match m = new Match(pattern.size());
052                m.set(ele, 0);
053                return search(m, pattern);
054        }
055
056        /**
057         * Continues searching with the mapped constraint at the given index.
058         * @param match match to start from
059         * @param mc mapped constraints of the pattern
060         * @param index index of the current mapped constraint
061         * @return matching results
062         */
063        public static List<Match> searchRecursive(Match match, List<MappedConst> mc, int index) 
064        {
065                List<Match> result = new ArrayList<Match>();
066
067                // debug code
068//              if (match.get(2) != null && match.get(2).getRDFId().equals("http://pid.nci.nih.gov/biopaxpid_39918") &&
069//                      match.get(6) != null && match.get(6).getRDFId().equals("http://pid.nci.nih.gov/biopaxpid_12411"))
070//              {
071//                      System.out.println();
072//              }
073                // debug code
074
075                Constraint con = mc.get(index).getConstr();
076                int[] ind = mc.get(index).getInds();
077                int lastInd = ind[ind.length-1];
078
079                if (con.canGenerate() && match.get(lastInd) == null)
080                {
081                        Collection<BioPAXElement> elements = con.generate(match, ind);
082
083                        for (BioPAXElement ele : elements)
084                        {
085                                match.set(ele, lastInd);
086                                
087                                if (mc.size() == index + 1)
088                                {
089                                        result.add((Match) match.clone());
090                                }
091                                else
092                                {
093                                        result.addAll(searchRecursive(match, mc, index + 1));
094                                }
095                                
096                                match.set(null, lastInd);
097                        }
098                }
099                else
100                {
101                        if (con.satisfies(match, ind))
102                        {
103                                if (mc.size() == index + 1)
104                                {
105                                        result.add((Match) match.clone());
106                                }
107                                else
108                                {
109                                        result.addAll(searchRecursive(match, mc, index + 1));
110                                }
111                        }
112                }
113                return result;
114        }
115
116        /**
117         * Searches the pattern in a given model, but instead of a match map, returns all matches in a
118         * list.
119         * @param model model to search in
120         * @param pattern pattern to search for
121         * @return matching results
122         */
123        public static List<Match> searchPlain(Model model, Pattern pattern)
124        {
125                List<Match> list = new LinkedList<Match>();
126
127                Map<BioPAXElement, List<Match>> map = search(model, pattern);
128                for (List<Match> matches : map.values())
129                {
130                        list.addAll(matches);
131                }
132                return list;
133        }
134
135        /**
136         * Searches the pattern starting from given elements, but instead of a match map, returns all
137         * matches in a list.
138         * @param eles elements to start from
139         * @param pattern pattern to search for
140         * @return matching results
141         */
142        public static List<Match> searchPlain(Collection<? extends BioPAXElement> eles, Pattern pattern)
143        {
144                List<Match> list = new LinkedList<Match>();
145
146                Map<BioPAXElement, List<Match>> map = search(eles, pattern);
147                for (List<Match> matches : map.values())
148                {
149                        list.addAll(matches);
150                }
151                return list;
152        }
153
154        /**
155         * Searches the given pattern in the given model.
156         * @param model model to search in
157         * @param pattern pattern to search for
158         * @return map from starting elements to the list matching results
159         */
160        public static Map<BioPAXElement, List<Match>> search(Model model, Pattern pattern)
161        {
162                return search(model, pattern, null);
163        }
164
165        /**
166         * Searches the given pattern in the given model.
167         * @param model model to search in
168         * @param pattern pattern to search for
169         * @param prg progress watcher to keep track of the progress
170         * @return map from starting elements to the list matching results
171         */
172        public static Map<BioPAXElement, List<Match>> search(Model model, Pattern pattern,
173                ProgressWatcher prg)
174        {
175                Map<BioPAXElement, List<Match>> map = new HashMap<BioPAXElement, List<Match>>();
176
177                Set<? extends BioPAXElement> eles = model.getObjects(pattern.getStartingClass());
178
179                if (prg != null) prg.setTotalTicks(eles.size());
180
181                for (BioPAXElement ele : eles)
182                {
183                        List<Match> matches = search(ele, pattern);
184                        
185                        if (!matches.isEmpty())
186                        {
187                                map.put(ele, matches);
188                        }
189
190                        if (prg != null) prg.tick(1);
191                }
192                return map;
193        }
194
195        /**
196         * Searches the given pattern starting from the given elements.
197         * @param eles elements to start from
198         * @param pattern pattern to search for
199         * @return map from starting element to the matching results
200         */
201        public static Map<BioPAXElement, List<Match>> search(Collection<? extends BioPAXElement> eles,
202                Pattern pattern)
203        {
204                Map<BioPAXElement, List<Match>> map = new HashMap<BioPAXElement, List<Match>>();
205
206                for (BioPAXElement ele : eles)
207                {
208                        if (!pattern.getStartingClass().isAssignableFrom(ele.getModelInterface())) continue;
209                        
210                        List<Match> matches = search(ele, pattern);
211                        
212                        if (!matches.isEmpty())
213                        {
214                                map.put(ele, matches);
215                        }
216                }
217                return map;
218        }
219
220        /**
221         * Searches a model for the given pattern, then collects the specified elements of the matches
222         * and returns.
223         *
224         * @param <T> BioPAX type
225         * @param model model to search in
226         * @param pattern pattern to search for
227         * @param index index of the element in the match to collect
228         * @param c type of the element to collect
229         * @return set of the elements at the specified index of the matching results
230         */
231        public static <T extends BioPAXElement> Set<T> searchAndCollect(
232                Model model, Pattern pattern, int index, Class<T> c)
233        {
234                return searchAndCollect(model.getObjects(pattern.getStartingClass()), pattern, index, c);
235        }
236
237        /**
238         * Searches the given pattern starting from the given elements, then collects the specified
239         * elements of the matches and returns.
240         *
241         * @param <T> BioPAX type
242         * @param eles elements to start from
243         * @param pattern pattern to search for
244         * @param index index of the element in the match to collect
245         * @param c type of the element to collect
246         * @return set of the elements at the specified index of the matching results
247         */
248        public static <T extends BioPAXElement> Set<T> searchAndCollect(
249                Collection<? extends BioPAXElement> eles, Pattern pattern, int index, Class<T> c)
250        {
251                Set<T> set = new HashSet<T>();
252
253                for (Match match : searchPlain(eles, pattern))
254                {
255                        set.add((T) match.get(index));
256                }
257                return set;
258        }
259
260        /**
261         * Searches the given pattern starting from the given element, then collects the specified
262         * elements of the matches and returns.
263         *
264         * @param <T> BioPAX type
265         * @param ele element to start from
266         * @param pattern pattern to search for
267         * @param index index of the element in the match to collect
268         * @param c type of the element to collect
269         * @return set of the elements at the specified index of the matching results
270         */
271        public static <T extends BioPAXElement> Set<T> searchAndCollect(
272                BioPAXElement ele, Pattern pattern, int index, Class<T> c)
273        {
274                Set<T> set = new HashSet<T>();
275
276                for (Match match : search(ele, pattern))
277                {
278                        set.add((T) match.get(index));
279                }
280                return set;
281        }
282
283        /**
284         * Checks if there is any match for the given pattern if search starts from the given element.
285         * @param p pattern to search for
286         * @param ele element to start from
287         * @return true if there is a match
288         */
289        public boolean hasSolution(Pattern p, BioPAXElement ... ele)
290        {
291                Match m = new Match(p.size());
292                for (int i = 0; i < ele.length; i++)
293                {
294                        m.set(ele[i], i);
295                }
296
297                return !search(m, p).isEmpty();
298        }
299
300        /**
301         * Searches a pattern reading the model from the given file, and creates another model that is
302         * excised using the matching patterns.
303         * @param p pattern to search for
304         * @param inFile filename for the model to search in
305         * @param outFile filename for the result model
306         * @throws FileNotFoundException when no file exists
307         */
308        public static void searchInFile(Pattern p, String inFile, String outFile) throws FileNotFoundException
309        {
310                searchInFile(p, inFile, outFile, Integer.MAX_VALUE, Integer.MAX_VALUE);
311        }
312
313        /**
314         * Searches a pattern reading the model from the given file, and creates another model that is
315         * excised using the matching patterns. USers can limit the max number of starting element, and
316         * max number of matches for any starting element. These parameters is good for limiting the
317         * size of the result graph.
318         * @param p pattern to search for
319         * @param inFile filename for the model to search in
320         * @param outFile filename for the result model
321         * @param seedLimit max number of starting elements
322         * @param graphPerSeed max number of matches for a starting element
323         * @throws FileNotFoundException when no file exists
324         */
325        public static void searchInFile(Pattern p, String inFile, String outFile, int seedLimit,
326                int graphPerSeed) throws FileNotFoundException
327        {
328                SimpleIOHandler h = new SimpleIOHandler();
329                Model model = h.convertFromOWL(new FileInputStream(inFile));
330
331                Map<BioPAXElement,List<Match>> matchMap = Searcher.search(model, p);
332
333                System.out.println("matching groups size = " + matchMap.size());
334
335                List<Set<Interaction>> inters = new LinkedList<Set<Interaction>>();
336                Set<Integer> encountered = new HashSet<Integer>();
337
338                Set<BioPAXElement> toExise = new HashSet<BioPAXElement>();
339
340                int seedCounter = 0;
341                for (BioPAXElement ele : matchMap.keySet())
342                {
343                        if (seedCounter >= seedLimit) break;
344
345                        int matchCounter = 0;
346
347                        List<Match> matches = matchMap.get(ele);
348
349                        if (!matches.isEmpty()) seedCounter++;
350
351                        for (Match match : matches)
352                        {
353                                matchCounter++;
354                                
355                                if (matchCounter > graphPerSeed) break;
356
357                                Set<Interaction> ints = getInter(match);
358
359                                toExise.addAll(Arrays.asList(match.getVariables()));
360                                toExise.addAll(ints);
361
362                                Integer hash = hashSum(ints);
363                                if (!encountered.contains(hash))
364                                {
365                                        encountered.add(hash);
366                                        inters.add(ints);
367                                }
368                        }
369                }
370
371                System.out.println("created pathways = " + inters.size());
372
373                Model clonedModel = excise(model, toExise);
374
375                int i = 0;
376                for (Set<Interaction> ints : inters)
377                {
378                        Pathway pathway = clonedModel.addNew(Pathway.class,
379                                System.currentTimeMillis() + "PaxtoolsPatternGeneratedMatch" + (++i));
380
381                        pathway.setDisplayName("Match " + getLeadingZeros(i, inters.size()) + i);
382
383                        for (Interaction anInt : ints)
384                        {
385                                pathway.addPathwayComponent((Process) clonedModel.getByID(anInt.getRDFId()));
386                        }
387                }
388
389                handler.convertToOWL(clonedModel, new FileOutputStream(outFile));
390        }
391
392        /**
393         * Prepares an int for printing with leading zeros for the given size.
394         * @param i the int to prepare
395         * @param size max value for i
396         * @return printable string for i with leading zeros
397         */
398        private static String getLeadingZeros(int i, int size)
399        {
400                assert i <= size;
401                int w1 = (int) Math.floor(Math.log10(size));
402                int w2 = (int) Math.floor(Math.log10(i));
403                
404                String s = "";
405
406                for (int j = w2; j < w1; j++)
407                {
408                        s += "0";
409                }
410                return s;
411        }
412
413        /**
414         * IO handler for reading and writing BioPAX.
415         */
416        private static BioPAXIOHandler handler =  new SimpleIOHandler();
417
418        /**
419         * Editor map to use for excising.
420         */
421        private static final SimpleEditorMap EM = SimpleEditorMap.L3;
422
423        /**
424         * Excises a model to the given elements.
425         * @param model model to excise
426         * @param result elements to excise to
427         * @return excised model
428         */
429        private static Model excise(Model model, Set<BioPAXElement> result)
430        {
431                Completer c = new Completer(EM);
432
433                result = c.complete(result, model);
434
435                Cloner cln = new Cloner(EM, BioPAXLevel.L3.getDefaultFactory());
436
437                return cln.clone(model, result);
438        }
439
440        /**
441         * Gets all interactions in a match.
442         * @param match match to search
443         * @return all interaction in the match
444         */
445        private static Set<Interaction> getInter(Match match)
446        {
447                Set<Interaction> set = new HashSet<Interaction>();
448                for (BioPAXElement ele : match.getVariables())
449                {
450                        if (ele instanceof Interaction) 
451                        {
452                                set.add((Interaction) ele);
453                                addControlsRecursive((Interaction) ele, set);
454                        }
455                }
456                return set;
457        }
458
459        /**
460         * Adds controls of the given interactions recursively to the given set.
461         * @param inter interaction to add its controls
462         * @param set set to add to
463         */
464        private static void addControlsRecursive(Interaction inter, Set<Interaction> set)
465        {
466                for (Control ctrl : inter.getControlledOf())
467                {
468                        set.add(ctrl);
469                        addControlsRecursive(ctrl, set);
470                }
471        }
472
473        /**
474         * Creates a hash code for a set of interactions.
475         * @param set interactions
476         * @return sum of hashes
477         */
478        private static Integer hashSum(Set<Interaction> set)
479        {
480                int x = 0;
481                for (Interaction inter : set)
482                {
483                        x += inter.hashCode();
484                }
485                return x;
486        }
487}