001package org.biopax.paxtools.pattern.util; 002 003import org.biopax.paxtools.model.level3.*; 004 005import java.io.*; 006import java.util.*; 007 008/** 009 * A blacklist is used for not using ubiquitous small molecules in patterns. This class knows how to 010 * read itself from an InputStream, and can write itself to an OutputStream. 011 * 012 * @author Ozgun Babur 013 */ 014public class Blacklist 015{ 016 /** 017 * Holds ID of blacklisted small molecule's reference. Maps them to the context of ubiquity. 018 * When the context is both INPUT and OUTPUT, it is represented with a null value. 019 */ 020 private Map<String, RelType> context; 021 022 /** 023 * Maps IDs of blacklisted small molecule references to their ubiquity scores. 024 */ 025 private Map<String, Integer> score; 026 027 /** 028 * The deliminator string in the data. 029 */ 030 private static final String DELIM = "\t"; 031 032 /** 033 * Constructor for a blank blacklist. 034 */ 035 public Blacklist() 036 { 037 context = new HashMap<String, RelType>(); 038 score = new HashMap<String, Integer>(); 039 } 040 041 /** 042 * Constructor with resource file name. 043 * 044 * @param filename file path to import the blacklist entries from 045 */ 046 public Blacklist(String filename) 047 { 048 this(); 049 load(filename); 050 } 051 052 /** 053 * Constructor with resource input stream. 054 * 055 * @param is input stream to read/init the blacklist from 056 */ 057 public Blacklist(InputStream is) 058 { 059 this(); 060 load(is); 061 } 062 063 //----- Section: Input / Output ---------------------------------------------------------------| 064 065 /** 066 * Reads data from the given file. 067 */ 068 private void load(String filename) 069 { 070 try 071 { 072 load(new FileInputStream(filename)); 073 } 074 catch (FileNotFoundException e) 075 { 076 e.printStackTrace(); 077 } 078 } 079 080 /** 081 * Reads data from the input stream and loads itself. 082 */ 083 private void load(InputStream is) 084 { 085 try 086 { 087 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 088 089 for (String line = reader.readLine(); line != null; line = reader.readLine()) 090 { 091 String[] tok = line.split(DELIM); 092 if (tok.length >= 3) 093 { 094 addEntry(tok[0], Integer.parseInt(tok[1]), convertContext(tok[2])); 095 } 096 } 097 098 reader.close(); 099 } 100 catch (Exception e) 101 { 102 e.printStackTrace(); 103 context = null; 104 score = null; 105 } 106 } 107 108 /** 109 * Adds a new blacklisted ID. 110 * @param id ID of the blacklisted molecule 111 * @param score the ubiquity score 112 * @param context context of ubiquity 113 */ 114 public void addEntry(String id, int score, RelType context) 115 { 116 this.score.put(id, score); 117 this.context.put(id, context); 118 } 119 120 /** 121 * Gets the IDs of the blacklisted molecules. 122 * 123 * @return IDs 124 */ 125 public Set<String> getListed() 126 { 127 return score.keySet(); 128 } 129 130 /** 131 * Dumps data to the given file. 132 * 133 * @param filename output file name 134 */ 135 public void write(String filename) 136 { 137 try 138 { 139 write(new FileOutputStream(filename)); 140 } 141 catch (FileNotFoundException e) 142 { 143 e.printStackTrace(); 144 } 145 } 146 147 /** 148 * Dumps data to the given output stream. 149 * 150 * @param os output stream 151 */ 152 public void write(OutputStream os) 153 { 154 List<String> ids = new ArrayList<String>(score.keySet()); 155 final Map<String, Integer> score = this.score; 156 Collections.sort(ids, new Comparator<String>() 157 { 158 @Override 159 public int compare(String o1, String o2) 160 { 161 return score.get(o2).compareTo(score.get(o1)); 162 } 163 }); 164 165 try 166 { 167 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); 168 169 boolean notFirst = false; 170 171 for (String id : ids) 172 { 173 if (notFirst) writer.write("\n"); 174 else notFirst = true; 175 176 writer.write(id + DELIM + score.get(id) + DELIM + convertContext(context.get(id))); 177 } 178 179 writer.close(); 180 } 181 catch (IOException e) 182 { 183 e.printStackTrace(); 184 } 185 } 186 187 /** 188 * Converts enum context to text. 189 * @param type context 190 * @return text value 191 */ 192 private String convertContext(RelType type) 193 { 194 if (type == null) return "B"; 195 196 switch (type) 197 { 198 case INPUT: return "I"; 199 case OUTPUT: return "O"; 200 default: return "B"; 201 } 202 } 203 204 /** 205 * Converts text context to enum. 206 * @param type context 207 * @return enum value 208 */ 209 private RelType convertContext(String type) 210 { 211 if (type.equals("I")) return RelType.INPUT; 212 if (type.equals("O")) return RelType.OUTPUT; 213 if (type.equals("B")) return null; 214 throw new IllegalArgumentException("Unknown context: " + type); 215 } 216 217 // --------- Section: Accessory methods -------------------------------------------------------| 218 219 /** 220 * Gets the subset with the least score. 221 */ 222 private Set<String> getLeastUbique(Collection<String> ids) 223 { 224 Set<String> select = new HashSet<String>(); 225 226 int s = getLeastScore(ids); 227 228 for (String id : ids) 229 { 230 if (score.get(id) == s) select.add(id); 231 } 232 233 return select; 234 } 235 236 /** 237 * Gets the least score of the given ids. 238 */ 239 private int getLeastScore(Collection<String> ids) 240 { 241 int s = Integer.MAX_VALUE; 242 243 for (String id : ids) 244 { 245 if (score.get(id) < s) s = score.get(id); 246 } 247 248 return s; 249 } 250 251 /** 252 * Gets the context of the ubiquity of the ID. Be careful with the result. If the result is 253 * null, then either the ID may not be ubique, or the ID may be ubique without a context (which 254 * means in both contexts). 255 * @param id ID to check 256 * @return context of ubiquity 257 */ 258 public RelType getContext(String id) 259 { 260 return context.get(id); 261 } 262 263 /** 264 * Checks if the given ID is blacklisted in at least one context. 265 */ 266 private boolean isUbique(String id) 267 { 268 return isUbique(id, null); 269 } 270 271 /** 272 * Checks if the given ID is blacklisted in both contexts together. 273 */ 274 private boolean isUbiqueInBothContexts(String id) 275 { 276 return context.containsKey(id) && context.get(id) == null; 277 } 278 279 /** 280 * Checks if the given ID is blacklisted in the given context. 281 */ 282 private boolean isUbique(String id, RelType context) 283 { 284 if (context == null) return this.context.containsKey(id); 285 286 if (!isUbique(id)) return false; 287 288 RelType ctx = this.context.get(id); 289 return ctx == null || ctx.equals(context); 290 } 291 292 /** 293 * Checks if the given entity is blacklisted in at least one context. 294 * 295 * @param pe physical entity BioPAX object 296 * @return true/false 297 */ 298 public boolean isUbique(PhysicalEntity pe) 299 { 300 String id = getSMRID(pe); 301 return id != null && isUbique(id); 302 } 303 304 /** 305 * Checks if the given entity is blacklisted in both context together. 306 * 307 * @param pe physical entity BioPAX object 308 * @return true/false 309 */ 310 public boolean isUbiqueInBothContexts(PhysicalEntity pe) 311 { 312 String id = getSMRID(pe); 313 return id != null && isUbiqueInBothContexts(id); 314 } 315 316 /** 317 * Checks if the given entity is blacklisted for the given Conversion assuming the Conversion 318 * flows towards the given direction, and the entity is in given context. 319 * 320 * @param pe physical entity BioPAX object 321 * @param conv conversion interaction (BioPAX) 322 * @param dir conversion direction 323 * @param context relationship type - context 324 * @return true/false 325 */ 326 public boolean isUbique(PhysicalEntity pe, Conversion conv, ConversionDirectionType dir, 327 RelType context) 328 { 329 String id = getSMRID(pe); 330 if (id == null) return false; 331 332 if (dir == null) 333 throw new IllegalArgumentException("The conversion direction has to be specified."); 334 335 if (context == null) 336 throw new IllegalArgumentException("The context has to be only one type."); 337 338 Set<PhysicalEntity> parts; 339 340 if (dir == ConversionDirectionType.REVERSIBLE) 341 { 342 if (conv.getLeft().contains(pe)) parts = conv.getLeft(); 343 else if (conv.getRight().contains(pe)) parts = conv.getRight(); 344 else throw new IllegalArgumentException("The PhysicalEntity has to be at least one " + 345 "side of the Conversion"); 346 } 347 else 348 { 349 parts = dir == ConversionDirectionType.LEFT_TO_RIGHT ? 350 context == RelType.INPUT ? conv.getLeft() : conv.getRight() : 351 context == RelType.OUTPUT ? conv.getLeft() : conv.getRight(); 352 } 353 354 // if the Conversion direction is reversible, then don't mind the current context 355 if (dir == ConversionDirectionType.REVERSIBLE) 356 return getUbiques(parts, null).contains(pe); 357 else return getUbiques(parts, context).contains(pe); 358 } 359 360 /** 361 * Gets the ID of the reference of the given entity if it is a small molecule. 362 */ 363 private String getSMRID(PhysicalEntity pe) 364 { 365 if (pe instanceof SmallMolecule) 366 { 367 EntityReference er = ((SmallMolecule) pe).getEntityReference(); 368 if (er != null) return er.getRDFId(); 369 } 370 return null; 371 } 372 373 /** 374 * Gets the ubiquitous small molecules among the given set and in the given context. It is 375 * assumed that the given set is either left or right of a Conversion. If there is no 376 * non-ubiquitous element in the set, then the least ubique(s) are removed from the result. 377 * @param entities left or right of a conversion 378 * @param context are these entities input or output 379 * @return ubiquitous small molecules in the given context 380 */ 381 public Collection<SmallMolecule> getUbiques(Set<PhysicalEntity> entities, RelType context) 382 { 383 Map<String, SmallMolecule> ubiques = new HashMap<String, SmallMolecule>(); 384 boolean allUbiques = true; 385 386 for (PhysicalEntity pe : entities) 387 { 388 if (pe instanceof SmallMolecule) 389 { 390 EntityReference er = ((SmallMolecule) pe).getEntityReference(); 391 392 if (er != null && isUbique(er.getRDFId(), context)) 393 { 394 ubiques.put(er.getRDFId(), (SmallMolecule) pe); 395 } 396 else 397 { 398 allUbiques = false; 399 } 400 } 401 else allUbiques = false; 402 } 403 404 if (allUbiques && !ubiques.isEmpty()) 405 { 406 Set<String> least = getLeastUbique(ubiques.keySet()); 407 for (String id : least) 408 { 409 ubiques.remove(id); 410 } 411 } 412 413 return ubiques.values(); 414 } 415 416 /** 417 * Gets the non-ubiquitous physical entities in the given set and in the given context. It is 418 * assumed that the given set is either left or right of a Conversion. If there is no 419 * non-ubiquitous element in the set, then the least ubique(s) are added to the result. 420 * @param entities left or right of a conversion 421 * @param ctx are these entities input or output 422 * @return non-ubiquitous physical entities in the given context 423 */ 424 public Set<PhysicalEntity> getNonUbiques(Set<PhysicalEntity> entities, RelType ctx) 425 { 426 Collection<SmallMolecule> ubiques = getUbiques(entities, ctx); 427 if (ubiques.isEmpty()) return entities; 428 429 Set<PhysicalEntity> result = new HashSet<PhysicalEntity>(entities); 430 result.removeAll(ubiques); 431 return result; 432 } 433 434 public Set getNonUbiqueObjects(Set objects) 435 { 436 Set result = new HashSet(); 437 for (Object o : objects) 438 { 439 if (o instanceof SmallMolecule && !isUbique((SmallMolecule) o)) result.add(o); 440 } 441 return result; 442 } 443}