001package org.biopax.paxtools.pattern; 002 003import org.biopax.paxtools.controller.Cloner; 004import org.biopax.paxtools.controller.Completer; 005import org.biopax.paxtools.controller.SimpleEditorMap; 006import org.biopax.paxtools.io.BioPAXIOHandler; 007import org.biopax.paxtools.io.SimpleIOHandler; 008import org.biopax.paxtools.model.BioPAXElement; 009import org.biopax.paxtools.model.BioPAXLevel; 010import org.biopax.paxtools.model.Model; 011import org.biopax.paxtools.model.level3.*; 012import org.biopax.paxtools.model.level3.Process; 013import org.biopax.paxtools.pattern.util.ProgressWatcher; 014 015import java.io.FileInputStream; 016import java.io.FileNotFoundException; 017import java.io.FileOutputStream; 018import java.util.*; 019 020/** 021 * Searcher for searching a given pattern in a model. 022 * 023 * @author Ozgun Babur 024 */ 025public class Searcher 026{ 027 /** 028 * Searches the pattern starting from the given match. The first element of the match should be 029 * assigned. Others are optional. 030 * @param m match to start from 031 * @param pattern pattern to search 032 * @return result matches 033 */ 034 public static List<Match> search(Match m, Pattern pattern) 035 { 036 assert pattern.getStartingClass().isAssignableFrom(m.get(0).getModelInterface()); 037 038 return searchRecursive(m, pattern.getConstraints(), 0); 039 } 040 041 /** 042 * Searches the pattern starting from the given element. 043 * @param ele element to start from 044 * @param pattern pattern to search 045 * @return matching results 046 */ 047 public static List<Match> search(BioPAXElement ele, Pattern pattern) 048 { 049 assert pattern.getStartingClass().isAssignableFrom(ele.getModelInterface()); 050 051 Match m = new Match(pattern.size()); 052 m.set(ele, 0); 053 return search(m, pattern); 054 } 055 056 /** 057 * Continues searching with the mapped constraint at the given index. 058 * @param match match to start from 059 * @param mc mapped constraints of the pattern 060 * @param index index of the current mapped constraint 061 * @return matching results 062 */ 063 public static List<Match> searchRecursive(Match match, List<MappedConst> mc, int index) 064 { 065 List<Match> result = new ArrayList<Match>(); 066 067 // debug code 068// if (match.get(2) != null && match.get(2).getRDFId().equals("http://pid.nci.nih.gov/biopaxpid_39918") && 069// match.get(6) != null && match.get(6).getRDFId().equals("http://pid.nci.nih.gov/biopaxpid_12411")) 070// { 071// System.out.println(); 072// } 073 // debug code 074 075 Constraint con = mc.get(index).getConstr(); 076 int[] ind = mc.get(index).getInds(); 077 int lastInd = ind[ind.length-1]; 078 079 if (con.canGenerate() && match.get(lastInd) == null) 080 { 081 Collection<BioPAXElement> elements = con.generate(match, ind); 082 083 for (BioPAXElement ele : elements) 084 { 085 match.set(ele, lastInd); 086 087 if (mc.size() == index + 1) 088 { 089 result.add((Match) match.clone()); 090 } 091 else 092 { 093 result.addAll(searchRecursive(match, mc, index + 1)); 094 } 095 096 match.set(null, lastInd); 097 } 098 } 099 else 100 { 101 if (con.satisfies(match, ind)) 102 { 103 if (mc.size() == index + 1) 104 { 105 result.add((Match) match.clone()); 106 } 107 else 108 { 109 result.addAll(searchRecursive(match, mc, index + 1)); 110 } 111 } 112 } 113 return result; 114 } 115 116 /** 117 * Searches the pattern in a given model, but instead of a match map, returns all matches in a 118 * list. 119 * @param model model to search in 120 * @param pattern pattern to search for 121 * @return matching results 122 */ 123 public static List<Match> searchPlain(Model model, Pattern pattern) 124 { 125 List<Match> list = new LinkedList<Match>(); 126 127 Map<BioPAXElement, List<Match>> map = search(model, pattern); 128 for (List<Match> matches : map.values()) 129 { 130 list.addAll(matches); 131 } 132 return list; 133 } 134 135 /** 136 * Searches the pattern starting from given elements, but instead of a match map, returns all 137 * matches in a list. 138 * @param eles elements to start from 139 * @param pattern pattern to search for 140 * @return matching results 141 */ 142 public static List<Match> searchPlain(Collection<? extends BioPAXElement> eles, Pattern pattern) 143 { 144 List<Match> list = new LinkedList<Match>(); 145 146 Map<BioPAXElement, List<Match>> map = search(eles, pattern); 147 for (List<Match> matches : map.values()) 148 { 149 list.addAll(matches); 150 } 151 return list; 152 } 153 154 /** 155 * Searches the given pattern in the given model. 156 * @param model model to search in 157 * @param pattern pattern to search for 158 * @return map from starting elements to the list matching results 159 */ 160 public static Map<BioPAXElement, List<Match>> search(Model model, Pattern pattern) 161 { 162 return search(model, pattern, null); 163 } 164 165 /** 166 * Searches the given pattern in the given model. 167 * @param model model to search in 168 * @param pattern pattern to search for 169 * @param prg progress watcher to keep track of the progress 170 * @return map from starting elements to the list matching results 171 */ 172 public static Map<BioPAXElement, List<Match>> search(Model model, Pattern pattern, 173 ProgressWatcher prg) 174 { 175 Map<BioPAXElement, List<Match>> map = new HashMap<BioPAXElement, List<Match>>(); 176 177 Set<? extends BioPAXElement> eles = model.getObjects(pattern.getStartingClass()); 178 179 if (prg != null) prg.setTotalTicks(eles.size()); 180 181 for (BioPAXElement ele : eles) 182 { 183 List<Match> matches = search(ele, pattern); 184 185 if (!matches.isEmpty()) 186 { 187 map.put(ele, matches); 188 } 189 190 if (prg != null) prg.tick(1); 191 } 192 return map; 193 } 194 195 /** 196 * Searches the given pattern starting from the given elements. 197 * @param eles elements to start from 198 * @param pattern pattern to search for 199 * @return map from starting element to the matching results 200 */ 201 public static Map<BioPAXElement, List<Match>> search(Collection<? extends BioPAXElement> eles, 202 Pattern pattern) 203 { 204 Map<BioPAXElement, List<Match>> map = new HashMap<BioPAXElement, List<Match>>(); 205 206 for (BioPAXElement ele : eles) 207 { 208 if (!pattern.getStartingClass().isAssignableFrom(ele.getModelInterface())) continue; 209 210 List<Match> matches = search(ele, pattern); 211 212 if (!matches.isEmpty()) 213 { 214 map.put(ele, matches); 215 } 216 } 217 return map; 218 } 219 220 /** 221 * Searches a model for the given pattern, then collects the specified elements of the matches 222 * and returns. 223 * 224 * @param <T> BioPAX type 225 * @param model model to search in 226 * @param pattern pattern to search for 227 * @param index index of the element in the match to collect 228 * @param c type of the element to collect 229 * @return set of the elements at the specified index of the matching results 230 */ 231 public static <T extends BioPAXElement> Set<T> searchAndCollect( 232 Model model, Pattern pattern, int index, Class<T> c) 233 { 234 return searchAndCollect(model.getObjects(pattern.getStartingClass()), pattern, index, c); 235 } 236 237 /** 238 * Searches the given pattern starting from the given elements, then collects the specified 239 * elements of the matches and returns. 240 * 241 * @param <T> BioPAX type 242 * @param eles elements to start from 243 * @param pattern pattern to search for 244 * @param index index of the element in the match to collect 245 * @param c type of the element to collect 246 * @return set of the elements at the specified index of the matching results 247 */ 248 public static <T extends BioPAXElement> Set<T> searchAndCollect( 249 Collection<? extends BioPAXElement> eles, Pattern pattern, int index, Class<T> c) 250 { 251 Set<T> set = new HashSet<T>(); 252 253 for (Match match : searchPlain(eles, pattern)) 254 { 255 set.add((T) match.get(index)); 256 } 257 return set; 258 } 259 260 /** 261 * Searches the given pattern starting from the given element, then collects the specified 262 * elements of the matches and returns. 263 * 264 * @param <T> BioPAX type 265 * @param ele element to start from 266 * @param pattern pattern to search for 267 * @param index index of the element in the match to collect 268 * @param c type of the element to collect 269 * @return set of the elements at the specified index of the matching results 270 */ 271 public static <T extends BioPAXElement> Set<T> searchAndCollect( 272 BioPAXElement ele, Pattern pattern, int index, Class<T> c) 273 { 274 Set<T> set = new HashSet<T>(); 275 276 for (Match match : search(ele, pattern)) 277 { 278 set.add((T) match.get(index)); 279 } 280 return set; 281 } 282 283 /** 284 * Checks if there is any match for the given pattern if search starts from the given element. 285 * @param p pattern to search for 286 * @param ele element to start from 287 * @return true if there is a match 288 */ 289 public boolean hasSolution(Pattern p, BioPAXElement ... ele) 290 { 291 Match m = new Match(p.size()); 292 for (int i = 0; i < ele.length; i++) 293 { 294 m.set(ele[i], i); 295 } 296 297 return !search(m, p).isEmpty(); 298 } 299 300 /** 301 * Searches a pattern reading the model from the given file, and creates another model that is 302 * excised using the matching patterns. 303 * @param p pattern to search for 304 * @param inFile filename for the model to search in 305 * @param outFile filename for the result model 306 * @throws FileNotFoundException when no file exists 307 */ 308 public static void searchInFile(Pattern p, String inFile, String outFile) throws FileNotFoundException 309 { 310 searchInFile(p, inFile, outFile, Integer.MAX_VALUE, Integer.MAX_VALUE); 311 } 312 313 /** 314 * Searches a pattern reading the model from the given file, and creates another model that is 315 * excised using the matching patterns. USers can limit the max number of starting element, and 316 * max number of matches for any starting element. These parameters is good for limiting the 317 * size of the result graph. 318 * @param p pattern to search for 319 * @param inFile filename for the model to search in 320 * @param outFile filename for the result model 321 * @param seedLimit max number of starting elements 322 * @param graphPerSeed max number of matches for a starting element 323 * @throws FileNotFoundException when no file exists 324 */ 325 public static void searchInFile(Pattern p, String inFile, String outFile, int seedLimit, 326 int graphPerSeed) throws FileNotFoundException 327 { 328 SimpleIOHandler h = new SimpleIOHandler(); 329 Model model = h.convertFromOWL(new FileInputStream(inFile)); 330 331 Map<BioPAXElement,List<Match>> matchMap = Searcher.search(model, p); 332 333 System.out.println("matching groups size = " + matchMap.size()); 334 335 List<Set<Interaction>> inters = new LinkedList<Set<Interaction>>(); 336 Set<Integer> encountered = new HashSet<Integer>(); 337 338 Set<BioPAXElement> toExise = new HashSet<BioPAXElement>(); 339 340 int seedCounter = 0; 341 for (BioPAXElement ele : matchMap.keySet()) 342 { 343 if (seedCounter >= seedLimit) break; 344 345 int matchCounter = 0; 346 347 List<Match> matches = matchMap.get(ele); 348 349 if (!matches.isEmpty()) seedCounter++; 350 351 for (Match match : matches) 352 { 353 matchCounter++; 354 355 if (matchCounter > graphPerSeed) break; 356 357 Set<Interaction> ints = getInter(match); 358 359 toExise.addAll(Arrays.asList(match.getVariables())); 360 toExise.addAll(ints); 361 362 Integer hash = hashSum(ints); 363 if (!encountered.contains(hash)) 364 { 365 encountered.add(hash); 366 inters.add(ints); 367 } 368 } 369 } 370 371 System.out.println("created pathways = " + inters.size()); 372 373 Model clonedModel = excise(model, toExise); 374 375 int i = 0; 376 for (Set<Interaction> ints : inters) 377 { 378 Pathway pathway = clonedModel.addNew(Pathway.class, 379 System.currentTimeMillis() + "PaxtoolsPatternGeneratedMatch" + (++i)); 380 381 pathway.setDisplayName("Match " + getLeadingZeros(i, inters.size()) + i); 382 383 for (Interaction anInt : ints) 384 { 385 pathway.addPathwayComponent((Process) clonedModel.getByID(anInt.getRDFId())); 386 } 387 } 388 389 handler.convertToOWL(clonedModel, new FileOutputStream(outFile)); 390 } 391 392 /** 393 * Prepares an int for printing with leading zeros for the given size. 394 * @param i the int to prepare 395 * @param size max value for i 396 * @return printable string for i with leading zeros 397 */ 398 private static String getLeadingZeros(int i, int size) 399 { 400 assert i <= size; 401 int w1 = (int) Math.floor(Math.log10(size)); 402 int w2 = (int) Math.floor(Math.log10(i)); 403 404 String s = ""; 405 406 for (int j = w2; j < w1; j++) 407 { 408 s += "0"; 409 } 410 return s; 411 } 412 413 /** 414 * IO handler for reading and writing BioPAX. 415 */ 416 private static BioPAXIOHandler handler = new SimpleIOHandler(); 417 418 /** 419 * Editor map to use for excising. 420 */ 421 private static final SimpleEditorMap EM = SimpleEditorMap.L3; 422 423 /** 424 * Excises a model to the given elements. 425 * @param model model to excise 426 * @param result elements to excise to 427 * @return excised model 428 */ 429 private static Model excise(Model model, Set<BioPAXElement> result) 430 { 431 Completer c = new Completer(EM); 432 433 result = c.complete(result, model); 434 435 Cloner cln = new Cloner(EM, BioPAXLevel.L3.getDefaultFactory()); 436 437 return cln.clone(model, result); 438 } 439 440 /** 441 * Gets all interactions in a match. 442 * @param match match to search 443 * @return all interaction in the match 444 */ 445 private static Set<Interaction> getInter(Match match) 446 { 447 Set<Interaction> set = new HashSet<Interaction>(); 448 for (BioPAXElement ele : match.getVariables()) 449 { 450 if (ele instanceof Interaction) 451 { 452 set.add((Interaction) ele); 453 addControlsRecursive((Interaction) ele, set); 454 } 455 } 456 return set; 457 } 458 459 /** 460 * Adds controls of the given interactions recursively to the given set. 461 * @param inter interaction to add its controls 462 * @param set set to add to 463 */ 464 private static void addControlsRecursive(Interaction inter, Set<Interaction> set) 465 { 466 for (Control ctrl : inter.getControlledOf()) 467 { 468 set.add(ctrl); 469 addControlsRecursive(ctrl, set); 470 } 471 } 472 473 /** 474 * Creates a hash code for a set of interactions. 475 * @param set interactions 476 * @return sum of hashes 477 */ 478 private static Integer hashSum(Set<Interaction> set) 479 { 480 int x = 0; 481 for (Interaction inter : set) 482 { 483 x += inter.hashCode(); 484 } 485 return x; 486 } 487}