001package org.biopax.paxtools.pattern.example;
002
003import org.biopax.paxtools.io.SimpleIOHandler;
004import org.biopax.paxtools.model.BioPAXElement;
005import org.biopax.paxtools.model.Model;
006import org.biopax.paxtools.model.level3.*;
007import org.biopax.paxtools.pattern.Match;
008import org.biopax.paxtools.pattern.Pattern;
009import org.biopax.paxtools.pattern.PatternBox;
010import org.biopax.paxtools.pattern.Searcher;
011import org.biopax.paxtools.pattern.miner.AbstractSIFMiner;
012import org.biopax.paxtools.pattern.miner.SIFEnum;
013import org.biopax.paxtools.pattern.util.Blacklist;
014
015import java.io.*;
016import java.util.*;
017
018/**
019 * This class goes over the state change pattern results and writes down the gained and lost
020 * modifications through these directed relations.
021 *
022 * Do not forget to allocate a large memory while running this example (like -Xmx8G).
023 *
024 * @author Ozgun Babur
025 */
026public class DeltaFeatureExtractor
027{
028        private Map<String, Map<String, Set<String>>> gainMods;
029        private Map<String, Map<String, Set<String>>> lossMods;
030        private Map<String, Map<String, Set<String>>> gainComps;
031        private Map<String, Map<String, Set<String>>> lossComps;
032        private Map<String, Map<String, Set<String>>> sourceMods;
033        private Map<String, Map<String, Set<String>>> sourceComps;
034        private Map<String, Map<String, Set<String>>> mediators;
035
036        private AbstractMiner[] miners = new AbstractMiner[]{
037                new CSCO(), new CSCO_ButPart(), new CSCO_CtrlAndPart(), new CSCO_ThrContSmMol()};
038
039        public static void main(String[] args) throws IOException
040        {
041                // A blacklist file is available at http://www.pathwaycommons.org/pc2/downloads/blacklist.txt
042                // This is for avoiding ubiquitous small molecules like ATP
043                Blacklist black = new Blacklist("blacklist.txt");
044
045                DeltaFeatureExtractor dfe = new DeltaFeatureExtractor();
046                dfe.setBlacklist(black);
047
048                SimpleIOHandler io = new SimpleIOHandler();
049
050                // The large model file is available at
051                // http://www.pathwaycommons.org/pc2/downloads/Pathway%20Commons.5.Detailed_Process_Data.BIOPAX.owl.gz
052                Model model = io.convertFromOWL(new FileInputStream(
053                        "Pathway Commons.5.Detailed_Process_Data.BIOPAX.owl"));
054
055                dfe.mineAndCollect(model);
056                dfe.writeResults("DeltaFeatures.txt");
057        }
058
059        abstract class AbstractMiner extends AbstractSIFMiner
060        {
061                public AbstractMiner()
062                {
063                        super(SIFEnum.CONTROLS_STATE_CHANGE_OF);
064                }
065
066                @Override
067                public abstract Pattern constructPattern();
068
069                @Override
070                public void writeResult(Map<BioPAXElement, List<Match>> matches, OutputStream out) throws IOException
071                {
072                        for (List<Match> matchList : matches.values())
073                        {
074                                for (Match m : matchList)
075                                {
076                                        // find source and target identifiers
077                                        Set<String> s1 = getIdentifiers(m, getSourceLabel());
078                                        Set<String> s2 = getIdentifiers(m, getTargetLabel());
079
080                                        if (s1.isEmpty() || s2.isEmpty()) continue;
081
082                                        // collect gained and lost modifications and cellular locations of the target
083
084                                        Set<String>[] modif = getDeltaModifications(m,
085                                                getInputSimplePELabel(), getInputComplexPELabel(),
086                                                getOutputSimplePELabel(), getOutputComplexPELabel());
087
088                                        Set<String>[] comps = getDeltaCompartments(m,
089                                                getInputSimplePELabel(), getInputComplexPELabel(),
090                                                getOutputSimplePELabel(), getOutputComplexPELabel());
091
092                                        // correct for inactive-labelled controllers and negative sign controls
093                                        int sign = sign(m, getControlLabels());
094                                        if (labeledInactive(m, getSourceSimplePELabel(), getSourceComplexPELabel()))
095                                                sign *= -1;
096
097                                        Set<String> modif0 = modif[sign == -1 ? 1 : 0];
098                                        Set<String> modif1 = modif[sign == -1 ? 0 : 1];
099                                        Set<String> comps0 = comps[sign == -1 ? 1 : 0];
100                                        Set<String> comps1 = comps[sign == -1 ? 0 : 1];
101
102                                        for (String s1s : s1)
103                                        {
104                                                for (String s2s : s2)
105                                                {
106                                                        if (!modif0.isEmpty()) collect(s1s, s2s, modif0, gainMods);
107                                                        if (!modif1.isEmpty()) collect(s1s, s2s, modif1, lossMods);
108                                                        if (!comps0.isEmpty()) collect(s1s, s2s, comps0, gainComps);
109                                                        if (!comps1.isEmpty()) collect(s1s, s2s, comps1, lossComps);
110
111                                                        if (!modif[0].isEmpty() || !modif[1].isEmpty() ||
112                                                                !comps[0].isEmpty() || !comps[1].isEmpty())
113                                                        {
114                                                                // record mediator ids to map these interactions to detailed data
115
116                                                                if (!mediators.containsKey(s1s)) mediators.put(s1s, new HashMap<String, Set<String>>());
117                                                                if (!mediators.get(s1s).containsKey(s2s)) mediators.get(s1s).put(s2s, new HashSet<String>());
118
119                                                                List<BioPAXElement> meds = m.get(getMediatorLabels(), getPattern());
120                                                                for (BioPAXElement med : meds)
121                                                                {
122                                                                        mediators.get(s1s).get(s2s).add(med.getRDFId());
123                                                                }
124
125                                                                // record modifications and cellular locations of the source molecule
126
127                                                                Set<String> mods = getModifications(m, getSourceSimplePELabel(), getSourceComplexPELabel());
128                                                                Set<String> locs = getCellularLocations(m, getSourceSimplePELabel(), getSourceComplexPELabel());
129
130                                                                collect(s1s, s2s, mods, sourceMods);
131                                                                collect(s1s, s2s, locs, sourceComps);
132                                                        }
133                                                }
134                                        }
135                                }
136                        }
137
138                        correctForActiveAndInactive(gainMods, lossMods, sourceMods);
139                }
140
141                private void collect(String s1, String s2, Set<String> modificationFeatures,
142                        Map<String, Map<String, Set<String>>> map)
143                {
144                        if (!map.containsKey(s1)) map.put(s1, new HashMap<String, Set<String>>());
145                        if (!map.get(s1).containsKey(s2)) map.get(s1).put(s2, new HashSet<String>());
146                        map.get(s1).get(s2).addAll(modificationFeatures);
147                }
148
149
150                String getSourceSimplePELabel()
151                {
152                        return "controller simple PE";
153                }
154
155                String getSourceComplexPELabel()
156                {
157                        return "controller PE";
158                }
159
160                String getInputSimplePELabel()
161                {
162                        return "input simple PE";
163                }
164
165                String getOutputSimplePELabel()
166                {
167                        return "output simple PE";
168                }
169
170                String getInputComplexPELabel()
171                {
172                        return "input PE";
173                }
174
175                String getOutputComplexPELabel()
176                {
177                        return "output PE";
178                }
179
180                @Override
181                public String getSourceLabel()
182                {
183                        return "controller ER";
184                }
185
186                @Override
187                public String getTargetLabel()
188                {
189                        return "changed ER";
190                }
191
192                @Override
193                public String[] getMediatorLabels()
194                {
195                        return new String[]{"Control", "Conversion"};
196                }
197
198                public String[] getControlLabels()
199                {
200                        return new String[]{"Control"};
201                }
202
203                protected String toString(ModificationFeature mf)
204                {
205                        String term = getModificationTerm(mf);
206                        if (term != null)
207                        {
208                                String loc = getPositionInString(mf);
209                                return term + loc;
210                        }
211                        return null;
212                }
213        }
214
215        class CSCO extends AbstractMiner
216        {
217                @Override
218                public Pattern constructPattern()
219                {
220                        return PatternBox.controlsStateChange();
221                }
222        }
223
224        class CSCO_CtrlAndPart extends AbstractMiner
225        {
226                @Override
227                public Pattern constructPattern()
228                {
229                        return PatternBox.controlsStateChangeBothControlAndPart();
230                }
231        }
232
233        class CSCO_ButPart extends AbstractMiner
234        {
235                @Override
236                public Pattern constructPattern()
237                {
238                        return PatternBox.controlsStateChangeButIsParticipant();
239                }
240
241                @Override
242                public String[] getControlLabels()
243                {
244                        return new String[]{};
245                }
246
247                @Override
248                public String[] getMediatorLabels()
249                {
250                        return new String[]{"Conversion"};
251                }
252        }
253
254        class CSCO_ThrContSmMol extends AbstractMiner
255        {
256                @Override
257                public Pattern constructPattern()
258                {
259                        return PatternBox.controlsStateChangeThroughControllerSmallMolecule(blacklist);
260                }
261
262                @Override
263                String getSourceSimplePELabel()
264                {
265                        return "upper controller simple PE";
266                }
267
268                @Override
269                String getSourceComplexPELabel()
270                {
271                        return "upper controller PE";
272                }
273
274                @Override
275                public String getSourceLabel()
276                {
277                        return "upper controller ER";
278                }
279
280                @Override
281                public String[] getMediatorLabels()
282                {
283                        return new String[]{"upper Control", "upper Conversion", "Control", "Conversion"};
284                }
285
286                @Override
287                public String[] getControlLabels()
288                {
289                        return new String[]{"upper Control", "Control"};
290                }
291        }
292
293        public DeltaFeatureExtractor()
294        {
295                gainMods = new HashMap<String, Map<String, Set<String>>>();
296                lossMods = new HashMap<String, Map<String, Set<String>>>();
297                gainComps = new HashMap<String, Map<String, Set<String>>>();
298                lossComps = new HashMap<String, Map<String, Set<String>>>();
299                sourceMods = new HashMap<String, Map<String, Set<String>>>();
300                sourceComps = new HashMap<String, Map<String, Set<String>>>();
301                mediators = new HashMap<String, Map<String, Set<String>>>();
302        }
303
304        public void setBlacklist(Blacklist blacklist)
305        {
306                for (AbstractMiner miner : miners)
307                {
308                        miner.setBlacklist(blacklist);
309                }
310        }
311
312        public void mineAndCollect(Model model)
313        {
314                for (AbstractMiner miner : miners)
315                {
316                        Map<BioPAXElement, List<Match>> matches = Searcher.search(model, miner.getPattern());
317
318                        try { miner.writeResult(matches, null);
319                        } catch (IOException e){e.printStackTrace();}
320                }
321        }
322
323        public void writeResults(String filename) throws IOException
324        {
325                BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
326                writer.write("Source\tType\tTarget\tSource-modifs\tSource-locs\tGained-modifs\tLost-modifs\tGained-locs\tLost-locs\tMediators");
327
328                Set<String> s1s = new HashSet<String>(gainMods.keySet());
329                s1s.addAll(lossMods.keySet());
330
331                for (String s1 : s1s)
332                {
333                        Set<String> s2s = new HashSet<String>();
334                        if (gainMods.containsKey(s1)) s2s.addAll(gainMods.get(s1).keySet());
335                        if (lossMods.containsKey(s1)) s2s.addAll(lossMods.get(s1).keySet());
336
337                        for (String s2 : s2s)
338                        {
339                                writer.write("\n" + s1 + "\t" + SIFEnum.CONTROLS_STATE_CHANGE_OF.getTag() +
340                                        "\t" + s2);
341
342                                writeVal(writer, s1, s2, sourceMods);
343                                writeVal(writer, s1, s2, sourceComps);
344                                writeVal(writer, s1, s2, gainMods);
345                                writeVal(writer, s1, s2, lossMods);
346                                writeVal(writer, s1, s2, gainComps);
347                                writeVal(writer, s1, s2, lossComps);
348                                writer.write("\t" + toString(mediators.get(s1).get(s2)));
349                        }
350                }
351
352                writer.close();
353        }
354
355        private static final String ACTIVE_WORD = "residue modification, active";
356        private static final String INACTIVE_WORD = "residue modification, inactive";
357        private void correctForActiveAndInactive(Map<String, Map<String, Set<String>>>... maps)
358        {
359                for (Map<String, Map<String, Set<String>>> map : maps)
360                {
361                        for (Map<String, Set<String>> setMaps : map.values())
362                        {
363                                for (Set<String> set : setMaps.values())
364                                {
365                                        if (set.contains(ACTIVE_WORD))
366                                        {
367                                                set.remove(ACTIVE_WORD);
368                                                set.add("active");
369                                        }
370                                        if (set.contains(INACTIVE_WORD))
371                                        {
372                                                set.remove(INACTIVE_WORD);
373                                                set.add("inactive");
374                                        }
375                                }
376                        }
377                }
378        }
379
380        private void writeVal(BufferedWriter writer, String s1, String s2,
381                Map<String, Map<String, Set<String>>> map) throws IOException
382        {
383                writer.write("\t");
384                if (map.containsKey(s1) && map.get(s1).containsKey(s2))
385                {
386                        writer.write(map.get(s1).get(s2).toString());
387                }
388        }
389
390        private String toString(Set<String> set)
391        {
392                String s = "";
393                for (String s1 : set)
394                {
395                        s += " " + s1;
396                }
397                return s.substring(1);
398        }
399}