001package org.biopax.paxtools.examples;
002
003import org.biopax.paxtools.controller.PropertyEditor;
004import org.biopax.paxtools.controller.SimpleEditorMap;
005import org.biopax.paxtools.controller.Traverser;
006import org.biopax.paxtools.controller.Visitor;
007import org.biopax.paxtools.model.BioPAXElement;
008import org.biopax.paxtools.model.BioPAXLevel;
009import org.biopax.paxtools.model.Model;
010import org.biopax.paxtools.model.level2.*;
011
012import java.io.FileNotFoundException;
013import java.io.PrintWriter;
014import java.util.*;
015
016/**
017 * Prints all the genes (aka proteins) in the L2 pathway 
018 * and sub-pathways (*conditions apply), and also - trace 
019 * where they come from.
020 * 
021 * * Note: it doesn't traverse the 'NEXT-STEP' property!
022 * (as doing so may take you beyond the pathway of interest)
023 * 
024 * @author rodch
025 */
026public class PathwayGenesExtractor implements Visitor {
027
028        static final String OUT = "geneset.txt";        
029        
030        pathway pw;
031        Map<String,Set<String>> geneset;
032        Traverser traverser;
033        Collection<BioPAXElement> visited;
034        String path = "";
035        Collection<pathway> subpathways;
036        Collection<interaction> interactions;
037        
038        public PathwayGenesExtractor(pathway pw) {
039                traverser = new Traverser(SimpleEditorMap.get(BioPAXLevel.L2), this);
040                geneset = new HashMap<String, Set<String>>();
041                subpathways = new HashSet<pathway>();
042                interactions = new HashSet<interaction>();
043                visited = new HashSet<BioPAXElement>();
044                this.pw = pw;
045        }
046        
047        void run() {
048                traverser.traverse(pw, null);
049        }
050        
051        public static void main(String[] args) throws FileNotFoundException {
052
053                if(args.length != 2) {
054                        System.out.println("\nUse Parameters: " +
055                                        "biopaxFile pathwayFullRdfId\n");
056                        System.exit(-1);
057                }
058
059                Model model = Macros.open(args[0]);
060                String pwId = args[1]; // gets pathway ID
061                pathway pw = (pathway) model.getByID(pwId);
062                
063                // extract proteins
064                PathwayGenesExtractor extractor = new PathwayGenesExtractor(pw);
065                extractor.run();
066                
067                PrintWriter out = new PrintWriter(OUT);
068                out.println("rdf:IDs of proteins in the pathway : " + pw.getNAME() + " and its sub-pathways.");
069                Set<String> glist = new HashSet<String>(); // to keep all IDs
070                        
071                for(String key : extractor.geneset.keySet()) {
072                        glist.addAll(extractor.geneset.get(key));
073                        StringBuffer sb = new StringBuffer(key);
074                        for(String name : extractor.geneset.get(key)) {
075                                sb.append(", ").append(name);
076                        }
077                        out.println(sb.toString());
078                }
079                
080                out.println("\nALL IDs:");
081                for(String g : glist) {
082                        out.println(g);
083                }
084                
085                out.println("\nSub-pathways (rdfId : NAME):");
086                for(pathway w : extractor.subpathways) {
087                        out.println(getLocalId(w) + " : " + w.getNAME());
088                }               
089
090                out.println("\nInteractions:");
091                for(interaction it : extractor.interactions) {
092                        out.println(getLocalId(it) + " : " + it.getNAME());
093                }
094                        
095                out.close();
096        }
097
098        public void visit(BioPAXElement domain1, Object range, Model model, PropertyEditor editor) {
099                
100                // do not traverse the NEXT-STEP
101                if(editor.getProperty().equals("NEXT-STEP")) {
102                        return;
103                }
104                
105                if (range != null && range instanceof  BioPAXElement && !visited.contains(range))
106                {
107                        BioPAXElement bpe = (BioPAXElement) range;
108                        path += getIdent(bpe);
109                        System.out.print(path + editor.getProperty() + "=" 
110                                        + getLocalId(bpe) + " " + bpe.getModelInterface().getSimpleName());
111                        if(bpe instanceof entity && ((entity) bpe).getNAME() != null) {
112                                System.out.print(" {"
113                                                + ((entity) bpe).getNAME()
114                                                .replace("(name copied from entity in Homo sapiens)", "(name from human)")
115                                                + "}");
116                        }
117
118                        if(bpe instanceof pathway) {
119                                subpathways.add((pathway) bpe);
120                        } else if(bpe instanceof interaction) {
121                                interactions.add((interaction) bpe);
122                        }
123                        
124                        if (bpe instanceof protein) {
125                                protein p = (protein) bpe;
126                                String id = getLocalId(p);
127                                
128                                Set<String> refs = new HashSet<String>();
129                                for (xref x : p.getXREF()) {
130                                        if (x instanceof unificationXref || x instanceof relationshipXref) {
131                                                refs.add(x.getID());
132                                        }
133                                }
134
135                                System.out.print(" (" + refs.size() + " ADDED!)");
136                                
137                                if (geneset.containsKey(id)) {
138                                        geneset.get(id).addAll(refs);
139                                } else {
140                                        geneset.put(id, refs);
141                                }
142
143                        }
144                        System.out.println();
145                        
146                        visited.add(bpe);
147
148                        // go deeper
149                        traverser.traverse(bpe, model);
150                        
151                        path = path.substring(0, path.length()-4);      
152                }
153        }
154        
155        // get remarks
156        private String getIdent(BioPAXElement bpe) {
157                String ident = "----";
158
159                if(bpe instanceof pathway) {
160                        ident = "-pw-";
161                } else if(bpe instanceof interaction) {
162                        ident = "-in-";
163                } else if (bpe instanceof protein) {
164                        ident = "-pr-";
165                } else if (bpe instanceof complex) {
166                        ident = "-co-";
167                } 
168                return ident;
169        }
170        
171        // Gets the local part of the RDF ID (- beyond the last '#')
172        static String getLocalId(BioPAXElement bpe) {
173                String id = bpe.getRDFId();
174                return id.replaceFirst("^.+?#", "");
175        }
176
177}