001package org.biopax.paxtools.pattern.miner;
002
003import org.biopax.paxtools.model.BioPAXElement;
004import org.biopax.paxtools.model.Model;
005import org.biopax.paxtools.model.level3.SmallMoleculeReference;
006import org.biopax.paxtools.pattern.util.Blacklist;
007import org.biopax.paxtools.pattern.util.ChemicalNameNormalizer;
008import org.biopax.paxtools.pattern.util.RelType;
009
010import java.util.*;
011
012/**
013 * This class generates a blacklist for the given model. It is important that the given model is the
014 * very big integrated corpus. It won't work on tiny little model.
015 *
016 * @author Ozgun Babur
017 */
018public class BlacklistGenerator
019{
020        /**
021         * For deciding if the molecule is ubiquitous and for determining the score and context.
022         */
023        private Decider decider;
024
025        /**
026         * Constructor with decider. This decider should be optimized for the specific resource that the
027         * user deals with.
028         * @param decider decides if the molecule is ubique
029         */
030        public BlacklistGenerator(Decider decider)
031        {
032                this.decider = decider;
033        }
034
035        /**
036         * Default constructor.
037         */
038        public BlacklistGenerator()
039        {
040                this(new Decider()
041                {
042                        @Override
043                        public boolean isUbique(int neighborSize, int upstrOnly, int dwstrOnly)
044                        {
045                                return neighborSize >= 30;
046                        }
047
048                        @Override
049                        public int getScore(int neighborSize, int upstrOnly, int dwstrOnly)
050                        {
051                                return neighborSize;
052                        }
053
054                        @Override
055                        public RelType getContext(int neighborSize, int upstrOnly, int dwstrOnly)
056                        {
057                                if (upstrOnly > 10 * dwstrOnly) return RelType.OUTPUT;
058                                else if (dwstrOnly > 10 * upstrOnly) return RelType.INPUT;
059                                else return null;
060                        }
061                });
062        }
063
064        /**
065         * Generates the blacklist.
066         * @param model model to use
067         * @return the blacklist
068         */
069        public Blacklist generateBlacklist(Model model)
070        {
071                ChemicalNameNormalizer normalizer = new ChemicalNameNormalizer(model);
072                SIFSearcher searcher = new SIFSearcher(new Fetcher(normalizer), SIFEnum.USED_TO_PRODUCE);
073
074                Set<SIFInteraction> sifs = searcher.searchSIF(model);
075
076                // read interactions into maps
077
078                Map<String, Set<String>> upstrMap = new HashMap<String, Set<String>>();
079                Map<String, Set<String>> dwstrMap = new HashMap<String, Set<String>>();
080                Map<String, Set<String>> neighMap = new HashMap<String, Set<String>>();
081
082                for (SIFInteraction sif : sifs)
083                {
084                        String source = sif.sourceID;
085                        String target = sif.targetID;
086
087                        if (!neighMap.containsKey(source)) neighMap.put(source, new HashSet<String>());
088                        if (!neighMap.containsKey(target)) neighMap.put(target, new HashSet<String>());
089                        if (!dwstrMap.containsKey(source)) dwstrMap.put(source, new HashSet<String>());
090                        if (!dwstrMap.containsKey(target)) dwstrMap.put(target, new HashSet<String>());
091                        if (!upstrMap.containsKey(source)) upstrMap.put(source, new HashSet<String>());
092                        if (!upstrMap.containsKey(target)) upstrMap.put(target, new HashSet<String>());
093
094                        neighMap.get(source).add(target);
095                        neighMap.get(target).add(source);
096                        dwstrMap.get(source).add(target);
097                        upstrMap.get(target).add(source);
098                }
099
100                // remove intersection of upstream and downstream
101
102                for (String name : neighMap.keySet())
103                {
104                        if (!upstrMap.containsKey(name) || !dwstrMap.containsKey(name)) continue;
105
106                        Set<String> upstr = upstrMap.get(name);
107                        Set<String> dwstr = dwstrMap.get(name);
108
109                        Set<String> temp = new HashSet<String>(upstr);
110                        upstr.removeAll(dwstr);
111                        dwstr.removeAll(temp);
112                }
113
114
115                Blacklist blacklist = new Blacklist();
116
117                // populate the blacklist
118
119                for (SmallMoleculeReference smr : model.getObjects(SmallMoleculeReference.class))
120                {
121                        String name = normalizer.getName(smr);
122
123                        int neighSize = neighMap.containsKey(name) ? neighMap.get(name).size() : 0;
124                        int upstrOnly = upstrMap.containsKey(name) ? upstrMap.get(name).size() : 0;
125                        int dwstrOnly = dwstrMap.containsKey(name) ? dwstrMap.get(name).size() : 0;
126
127                        if (decider.isUbique(neighSize, upstrOnly, dwstrOnly))
128                        {
129                                blacklist.addEntry(smr.getRDFId(),
130                                        decider.getScore(neighSize, upstrOnly, dwstrOnly),
131                                        decider.getContext(neighSize, upstrOnly, dwstrOnly));
132                        }
133                }
134
135                blacklist.write("blacklist.txt");
136
137                return blacklist;
138        }
139
140        /**
141         * Class to fetch the ID of the small molecule.
142         */
143        class Fetcher implements IDFetcher
144        {
145                ChemicalNameNormalizer normalizer;
146
147                Fetcher(ChemicalNameNormalizer normalizer)
148                {
149                        this.normalizer = normalizer;
150                }
151
152                @Override
153                public Set<String> fetchID(BioPAXElement ele)
154                {
155                        if (ele instanceof SmallMoleculeReference)
156                        {
157                                return Collections.singleton(normalizer.getName((SmallMoleculeReference) ele));
158                        }
159
160                        return null;
161                }
162        }
163
164        /**
165         * The class to decide if a molecule is ubique, its score and its context of ubiquity.
166         */
167        static interface Decider
168        {
169                /**
170                 * Tells if the molecule is ubique in at least one context.
171                 * @param neighborSize number of neighbors in the used-to-produce network
172                 * @param upstrOnly number of upstream neighbors in the used-to-produce network, that are not also at downstream
173                 * @param dwstrOnly number of downstream neighbors in the used-to-produce network, that are not also at upstream
174                 */
175                public boolean isUbique(int neighborSize, int upstrOnly, int dwstrOnly);
176
177                /**
178                 * Gets the ubiquity score of the ubique molecule. This score is used for comparing ubiques
179                 * and deciding the most essential reactants of a reaction if all reactants are ubique.
180                 * @param neighborSize number of neighbors in the used-to-produce network
181                 * @param upstrOnly number of upstream neighbors in the used-to-produce network, that are not also at downstream
182                 * @param dwstrOnly number of downstream neighbors in the used-to-produce network, that are not also at upstream
183                 */
184                public int getScore(int neighborSize, int upstrOnly, int dwstrOnly);
185
186                /**
187                 * Gets the context of ubiquity. A molecule can be ubiquitously consumed, or can be
188                 * ubiquitously produced, or both. When it is both, this method has to return null.
189                 * @param neighborSize number of neighbors in the used-to-produce network
190                 * @param upstrOnly number of upstream neighbors in the used-to-produce network, that are not also at downstream
191                 * @param dwstrOnly number of downstream neighbors in the used-to-produce network, that are not also at upstream
192                 */
193                public RelType getContext(int neighborSize, int upstrOnly, int dwstrOnly);
194        }
195
196}