001package org.biopax.paxtools.pattern.example; 002 003import org.biopax.paxtools.io.SimpleIOHandler; 004import org.biopax.paxtools.model.BioPAXElement; 005import org.biopax.paxtools.model.Model; 006import org.biopax.paxtools.model.level3.*; 007import org.biopax.paxtools.pattern.Match; 008import org.biopax.paxtools.pattern.Pattern; 009import org.biopax.paxtools.pattern.PatternBox; 010import org.biopax.paxtools.pattern.Searcher; 011import org.biopax.paxtools.pattern.miner.AbstractSIFMiner; 012import org.biopax.paxtools.pattern.miner.SIFEnum; 013import org.biopax.paxtools.pattern.util.Blacklist; 014 015import java.io.*; 016import java.util.*; 017 018/** 019 * This class goes over the state change pattern results and writes down the gained and lost 020 * modifications through these directed relations. 021 * 022 * Do not forget to allocate a large memory while running this example (like -Xmx8G). 023 * 024 * @author Ozgun Babur 025 */ 026public class DeltaFeatureExtractor 027{ 028 private Map<String, Map<String, Set<String>>> gainMods; 029 private Map<String, Map<String, Set<String>>> lossMods; 030 private Map<String, Map<String, Set<String>>> gainComps; 031 private Map<String, Map<String, Set<String>>> lossComps; 032 private Map<String, Map<String, Set<String>>> sourceMods; 033 private Map<String, Map<String, Set<String>>> sourceComps; 034 private Map<String, Map<String, Set<String>>> mediators; 035 036 private AbstractMiner[] miners = new AbstractMiner[]{ 037 new CSCO(), new CSCO_ButPart(), new CSCO_CtrlAndPart(), new CSCO_ThrContSmMol()}; 038 039 public static void main(String[] args) throws IOException 040 { 041 // A blacklist file is available at http://www.pathwaycommons.org/pc2/downloads/blacklist.txt 042 // This is for avoiding ubiquitous small molecules like ATP 043 Blacklist black = new Blacklist("blacklist.txt"); 044 045 DeltaFeatureExtractor dfe = new DeltaFeatureExtractor(); 046 dfe.setBlacklist(black); 047 048 SimpleIOHandler io = new SimpleIOHandler(); 049 050 // The large model file is available at 051 // http://www.pathwaycommons.org/pc2/downloads/Pathway%20Commons.5.Detailed_Process_Data.BIOPAX.owl.gz 052 Model model = io.convertFromOWL(new FileInputStream( 053 "Pathway Commons.5.Detailed_Process_Data.BIOPAX.owl")); 054 055 dfe.mineAndCollect(model); 056 dfe.writeResults("DeltaFeatures.txt"); 057 } 058 059 abstract class AbstractMiner extends AbstractSIFMiner 060 { 061 public AbstractMiner() 062 { 063 super(SIFEnum.CONTROLS_STATE_CHANGE_OF); 064 } 065 066 @Override 067 public abstract Pattern constructPattern(); 068 069 @Override 070 public void writeResult(Map<BioPAXElement, List<Match>> matches, OutputStream out) throws IOException 071 { 072 for (List<Match> matchList : matches.values()) 073 { 074 for (Match m : matchList) 075 { 076 // find source and target identifiers 077 Set<String> s1 = getIdentifiers(m, getSourceLabel()); 078 Set<String> s2 = getIdentifiers(m, getTargetLabel()); 079 080 if (s1.isEmpty() || s2.isEmpty()) continue; 081 082 // collect gained and lost modifications and cellular locations of the target 083 084 Set<String>[] modif = getDeltaModifications(m, 085 getInputSimplePELabel(), getInputComplexPELabel(), 086 getOutputSimplePELabel(), getOutputComplexPELabel()); 087 088 Set<String>[] comps = getDeltaCompartments(m, 089 getInputSimplePELabel(), getInputComplexPELabel(), 090 getOutputSimplePELabel(), getOutputComplexPELabel()); 091 092 // correct for inactive-labelled controllers and negative sign controls 093 int sign = sign(m, getControlLabels()); 094 if (labeledInactive(m, getSourceSimplePELabel(), getSourceComplexPELabel())) 095 sign *= -1; 096 097 Set<String> modif0 = modif[sign == -1 ? 1 : 0]; 098 Set<String> modif1 = modif[sign == -1 ? 0 : 1]; 099 Set<String> comps0 = comps[sign == -1 ? 1 : 0]; 100 Set<String> comps1 = comps[sign == -1 ? 0 : 1]; 101 102 for (String s1s : s1) 103 { 104 for (String s2s : s2) 105 { 106 if (!modif0.isEmpty()) collect(s1s, s2s, modif0, gainMods); 107 if (!modif1.isEmpty()) collect(s1s, s2s, modif1, lossMods); 108 if (!comps0.isEmpty()) collect(s1s, s2s, comps0, gainComps); 109 if (!comps1.isEmpty()) collect(s1s, s2s, comps1, lossComps); 110 111 if (!modif[0].isEmpty() || !modif[1].isEmpty() || 112 !comps[0].isEmpty() || !comps[1].isEmpty()) 113 { 114 // record mediator ids to map these interactions to detailed data 115 116 if (!mediators.containsKey(s1s)) mediators.put(s1s, new HashMap<String, Set<String>>()); 117 if (!mediators.get(s1s).containsKey(s2s)) mediators.get(s1s).put(s2s, new HashSet<String>()); 118 119 List<BioPAXElement> meds = m.get(getMediatorLabels(), getPattern()); 120 for (BioPAXElement med : meds) 121 { 122 mediators.get(s1s).get(s2s).add(med.getRDFId()); 123 } 124 125 // record modifications and cellular locations of the source molecule 126 127 Set<String> mods = getModifications(m, getSourceSimplePELabel(), getSourceComplexPELabel()); 128 Set<String> locs = getCellularLocations(m, getSourceSimplePELabel(), getSourceComplexPELabel()); 129 130 collect(s1s, s2s, mods, sourceMods); 131 collect(s1s, s2s, locs, sourceComps); 132 } 133 } 134 } 135 } 136 } 137 138 correctForActiveAndInactive(gainMods, lossMods, sourceMods); 139 } 140 141 private void collect(String s1, String s2, Set<String> modificationFeatures, 142 Map<String, Map<String, Set<String>>> map) 143 { 144 if (!map.containsKey(s1)) map.put(s1, new HashMap<String, Set<String>>()); 145 if (!map.get(s1).containsKey(s2)) map.get(s1).put(s2, new HashSet<String>()); 146 map.get(s1).get(s2).addAll(modificationFeatures); 147 } 148 149 150 String getSourceSimplePELabel() 151 { 152 return "controller simple PE"; 153 } 154 155 String getSourceComplexPELabel() 156 { 157 return "controller PE"; 158 } 159 160 String getInputSimplePELabel() 161 { 162 return "input simple PE"; 163 } 164 165 String getOutputSimplePELabel() 166 { 167 return "output simple PE"; 168 } 169 170 String getInputComplexPELabel() 171 { 172 return "input PE"; 173 } 174 175 String getOutputComplexPELabel() 176 { 177 return "output PE"; 178 } 179 180 @Override 181 public String getSourceLabel() 182 { 183 return "controller ER"; 184 } 185 186 @Override 187 public String getTargetLabel() 188 { 189 return "changed ER"; 190 } 191 192 @Override 193 public String[] getMediatorLabels() 194 { 195 return new String[]{"Control", "Conversion"}; 196 } 197 198 public String[] getControlLabels() 199 { 200 return new String[]{"Control"}; 201 } 202 203 protected String toString(ModificationFeature mf) 204 { 205 String term = getModificationTerm(mf); 206 if (term != null) 207 { 208 String loc = getPositionInString(mf); 209 return term + loc; 210 } 211 return null; 212 } 213 } 214 215 class CSCO extends AbstractMiner 216 { 217 @Override 218 public Pattern constructPattern() 219 { 220 return PatternBox.controlsStateChange(); 221 } 222 } 223 224 class CSCO_CtrlAndPart extends AbstractMiner 225 { 226 @Override 227 public Pattern constructPattern() 228 { 229 return PatternBox.controlsStateChangeBothControlAndPart(); 230 } 231 } 232 233 class CSCO_ButPart extends AbstractMiner 234 { 235 @Override 236 public Pattern constructPattern() 237 { 238 return PatternBox.controlsStateChangeButIsParticipant(); 239 } 240 241 @Override 242 public String[] getControlLabels() 243 { 244 return new String[]{}; 245 } 246 247 @Override 248 public String[] getMediatorLabels() 249 { 250 return new String[]{"Conversion"}; 251 } 252 } 253 254 class CSCO_ThrContSmMol extends AbstractMiner 255 { 256 @Override 257 public Pattern constructPattern() 258 { 259 return PatternBox.controlsStateChangeThroughControllerSmallMolecule(blacklist); 260 } 261 262 @Override 263 String getSourceSimplePELabel() 264 { 265 return "upper controller simple PE"; 266 } 267 268 @Override 269 String getSourceComplexPELabel() 270 { 271 return "upper controller PE"; 272 } 273 274 @Override 275 public String getSourceLabel() 276 { 277 return "upper controller ER"; 278 } 279 280 @Override 281 public String[] getMediatorLabels() 282 { 283 return new String[]{"upper Control", "upper Conversion", "Control", "Conversion"}; 284 } 285 286 @Override 287 public String[] getControlLabels() 288 { 289 return new String[]{"upper Control", "Control"}; 290 } 291 } 292 293 public DeltaFeatureExtractor() 294 { 295 gainMods = new HashMap<String, Map<String, Set<String>>>(); 296 lossMods = new HashMap<String, Map<String, Set<String>>>(); 297 gainComps = new HashMap<String, Map<String, Set<String>>>(); 298 lossComps = new HashMap<String, Map<String, Set<String>>>(); 299 sourceMods = new HashMap<String, Map<String, Set<String>>>(); 300 sourceComps = new HashMap<String, Map<String, Set<String>>>(); 301 mediators = new HashMap<String, Map<String, Set<String>>>(); 302 } 303 304 public void setBlacklist(Blacklist blacklist) 305 { 306 for (AbstractMiner miner : miners) 307 { 308 miner.setBlacklist(blacklist); 309 } 310 } 311 312 public void mineAndCollect(Model model) 313 { 314 for (AbstractMiner miner : miners) 315 { 316 Map<BioPAXElement, List<Match>> matches = Searcher.search(model, miner.getPattern()); 317 318 try { miner.writeResult(matches, null); 319 } catch (IOException e){e.printStackTrace();} 320 } 321 } 322 323 public void writeResults(String filename) throws IOException 324 { 325 BufferedWriter writer = new BufferedWriter(new FileWriter(filename)); 326 writer.write("Source\tType\tTarget\tSource-modifs\tSource-locs\tGained-modifs\tLost-modifs\tGained-locs\tLost-locs\tMediators"); 327 328 Set<String> s1s = new HashSet<String>(gainMods.keySet()); 329 s1s.addAll(lossMods.keySet()); 330 331 for (String s1 : s1s) 332 { 333 Set<String> s2s = new HashSet<String>(); 334 if (gainMods.containsKey(s1)) s2s.addAll(gainMods.get(s1).keySet()); 335 if (lossMods.containsKey(s1)) s2s.addAll(lossMods.get(s1).keySet()); 336 337 for (String s2 : s2s) 338 { 339 writer.write("\n" + s1 + "\t" + SIFEnum.CONTROLS_STATE_CHANGE_OF.getTag() + 340 "\t" + s2); 341 342 writeVal(writer, s1, s2, sourceMods); 343 writeVal(writer, s1, s2, sourceComps); 344 writeVal(writer, s1, s2, gainMods); 345 writeVal(writer, s1, s2, lossMods); 346 writeVal(writer, s1, s2, gainComps); 347 writeVal(writer, s1, s2, lossComps); 348 writer.write("\t" + toString(mediators.get(s1).get(s2))); 349 } 350 } 351 352 writer.close(); 353 } 354 355 private static final String ACTIVE_WORD = "residue modification, active"; 356 private static final String INACTIVE_WORD = "residue modification, inactive"; 357 private void correctForActiveAndInactive(Map<String, Map<String, Set<String>>>... maps) 358 { 359 for (Map<String, Map<String, Set<String>>> map : maps) 360 { 361 for (Map<String, Set<String>> setMaps : map.values()) 362 { 363 for (Set<String> set : setMaps.values()) 364 { 365 if (set.contains(ACTIVE_WORD)) 366 { 367 set.remove(ACTIVE_WORD); 368 set.add("active"); 369 } 370 if (set.contains(INACTIVE_WORD)) 371 { 372 set.remove(INACTIVE_WORD); 373 set.add("inactive"); 374 } 375 } 376 } 377 } 378 } 379 380 private void writeVal(BufferedWriter writer, String s1, String s2, 381 Map<String, Map<String, Set<String>>> map) throws IOException 382 { 383 writer.write("\t"); 384 if (map.containsKey(s1) && map.get(s1).containsKey(s2)) 385 { 386 writer.write(map.get(s1).get(s2).toString()); 387 } 388 } 389 390 private String toString(Set<String> set) 391 { 392 String s = ""; 393 for (String s1 : set) 394 { 395 s += " " + s1; 396 } 397 return s.substring(1); 398 } 399}