001package org.biopax.paxtools.examples; 002 003import org.biopax.paxtools.controller.PropertyEditor; 004import org.biopax.paxtools.controller.SimpleEditorMap; 005import org.biopax.paxtools.controller.Traverser; 006import org.biopax.paxtools.controller.Visitor; 007import org.biopax.paxtools.model.BioPAXElement; 008import org.biopax.paxtools.model.BioPAXLevel; 009import org.biopax.paxtools.model.Model; 010import org.biopax.paxtools.model.level2.*; 011 012import java.io.FileNotFoundException; 013import java.io.PrintWriter; 014import java.util.*; 015 016/** 017 * Prints all the genes (aka proteins) in the L2 pathway 018 * and sub-pathways (*conditions apply), and also - trace 019 * where they come from. 020 * 021 * * Note: it doesn't traverse the 'NEXT-STEP' property! 022 * (as doing so may take you beyond the pathway of interest) 023 * 024 * @author rodch 025 */ 026public class PathwayGenesExtractor implements Visitor { 027 028 static final String OUT = "geneset.txt"; 029 030 pathway pw; 031 Map<String,Set<String>> geneset; 032 Traverser traverser; 033 Collection<BioPAXElement> visited; 034 String path = ""; 035 Collection<pathway> subpathways; 036 Collection<interaction> interactions; 037 038 public PathwayGenesExtractor(pathway pw) { 039 traverser = new Traverser(SimpleEditorMap.get(BioPAXLevel.L2), this); 040 geneset = new HashMap<String, Set<String>>(); 041 subpathways = new HashSet<pathway>(); 042 interactions = new HashSet<interaction>(); 043 visited = new HashSet<BioPAXElement>(); 044 this.pw = pw; 045 } 046 047 void run() { 048 traverser.traverse(pw, null); 049 } 050 051 public static void main(String[] args) throws FileNotFoundException { 052 053 if(args.length != 2) { 054 System.out.println("\nUse Parameters: " + 055 "biopaxFile pathwayFullRdfId\n"); 056 System.exit(-1); 057 } 058 059 Model model = Macros.open(args[0]); 060 String pwId = args[1]; // gets pathway ID 061 pathway pw = (pathway) model.getByID(pwId); 062 063 // extract proteins 064 PathwayGenesExtractor extractor = new PathwayGenesExtractor(pw); 065 extractor.run(); 066 067 PrintWriter out = new PrintWriter(OUT); 068 out.println("rdf:IDs of proteins in the pathway : " + pw.getNAME() + " and its sub-pathways."); 069 Set<String> glist = new HashSet<String>(); // to keep all IDs 070 071 for(String key : extractor.geneset.keySet()) { 072 glist.addAll(extractor.geneset.get(key)); 073 StringBuffer sb = new StringBuffer(key); 074 for(String name : extractor.geneset.get(key)) { 075 sb.append(", ").append(name); 076 } 077 out.println(sb.toString()); 078 } 079 080 out.println("\nALL IDs:"); 081 for(String g : glist) { 082 out.println(g); 083 } 084 085 out.println("\nSub-pathways (rdfId : NAME):"); 086 for(pathway w : extractor.subpathways) { 087 out.println(getLocalId(w) + " : " + w.getNAME()); 088 } 089 090 out.println("\nInteractions:"); 091 for(interaction it : extractor.interactions) { 092 out.println(getLocalId(it) + " : " + it.getNAME()); 093 } 094 095 out.close(); 096 } 097 098 public void visit(BioPAXElement domain1, Object range, Model model, PropertyEditor editor) { 099 100 // do not traverse the NEXT-STEP 101 if(editor.getProperty().equals("NEXT-STEP")) { 102 return; 103 } 104 105 if (range != null && range instanceof BioPAXElement && !visited.contains(range)) 106 { 107 BioPAXElement bpe = (BioPAXElement) range; 108 path += getIdent(bpe); 109 System.out.print(path + editor.getProperty() + "=" 110 + getLocalId(bpe) + " " + bpe.getModelInterface().getSimpleName()); 111 if(bpe instanceof entity && ((entity) bpe).getNAME() != null) { 112 System.out.print(" {" 113 + ((entity) bpe).getNAME() 114 .replace("(name copied from entity in Homo sapiens)", "(name from human)") 115 + "}"); 116 } 117 118 if(bpe instanceof pathway) { 119 subpathways.add((pathway) bpe); 120 } else if(bpe instanceof interaction) { 121 interactions.add((interaction) bpe); 122 } 123 124 if (bpe instanceof protein) { 125 protein p = (protein) bpe; 126 String id = getLocalId(p); 127 128 Set<String> refs = new HashSet<String>(); 129 for (xref x : p.getXREF()) { 130 if (x instanceof unificationXref || x instanceof relationshipXref) { 131 refs.add(x.getID()); 132 } 133 } 134 135 System.out.print(" (" + refs.size() + " ADDED!)"); 136 137 if (geneset.containsKey(id)) { 138 geneset.get(id).addAll(refs); 139 } else { 140 geneset.put(id, refs); 141 } 142 143 } 144 System.out.println(); 145 146 visited.add(bpe); 147 148 // go deeper 149 traverser.traverse(bpe, model); 150 151 path = path.substring(0, path.length()-4); 152 } 153 } 154 155 // get remarks 156 private String getIdent(BioPAXElement bpe) { 157 String ident = "----"; 158 159 if(bpe instanceof pathway) { 160 ident = "-pw-"; 161 } else if(bpe instanceof interaction) { 162 ident = "-in-"; 163 } else if (bpe instanceof protein) { 164 ident = "-pr-"; 165 } else if (bpe instanceof complex) { 166 ident = "-co-"; 167 } 168 return ident; 169 } 170 171 // Gets the local part of the RDF ID (- beyond the last '#') 172 static String getLocalId(BioPAXElement bpe) { 173 String id = bpe.getRDFId(); 174 return id.replaceFirst("^.+?#", ""); 175 } 176 177}