-
Notifications
You must be signed in to change notification settings - Fork 8
Fixes #519: Knowledge Gap bug with semi-duplicate triples #594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,15 +2,13 @@ | |
|
|
||
| import java.io.IOException; | ||
| import java.time.Instant; | ||
| import java.util.Collections; | ||
| import java.util.HashSet; | ||
| import java.util.Iterator; | ||
| import java.util.Map; | ||
| import java.util.*; | ||
| import java.util.Map.Entry; | ||
| import java.util.Set; | ||
| import java.util.concurrent.CompletableFuture; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import eu.knowledge.engine.reasoner.api.TripleNode; | ||
| import org.apache.jena.graph.Node; | ||
| import org.apache.jena.sparql.core.TriplePath; | ||
| import org.apache.jena.sparql.core.Var; | ||
|
|
@@ -668,21 +666,21 @@ public Set<KnowledgeGap> getKnowledgeGaps(RuleNode plan) { | |
|
|
||
| assert plan instanceof AntSide; | ||
|
|
||
| Set<KnowledgeGap> existingOrGaps = new HashSet<KnowledgeGap>(); | ||
| Set<KnowledgeGap> existingOrGaps = new HashSet<>(); | ||
|
|
||
| // TODO do we need to include the parent if we are not backward chaining? | ||
| Map<TriplePattern, Set<RuleNode>> nodeCoverage = plan | ||
| .findAntecedentCoverage(((AntSide) plan).getAntecedentNeighbours()); | ||
|
|
||
| // collect triple patterns that have an empty set | ||
| Set<KnowledgeGap> collectedOrGaps, someGaps = new HashSet<KnowledgeGap>(); | ||
| Set<KnowledgeGap> collectedOrGaps, someGaps = new HashSet<>(); | ||
|
|
||
| for (Entry<TriplePattern, Set<RuleNode>> entry : nodeCoverage.entrySet()) { | ||
|
|
||
| LOG.debug("Entry key is {}", entry.getKey()); | ||
| LOG.debug("Entry value is {}", entry.getValue()); | ||
|
|
||
| collectedOrGaps = new HashSet<KnowledgeGap>(); | ||
| collectedOrGaps = new HashSet<>(); | ||
| boolean foundNeighborWithoutGap = false; | ||
| for (RuleNode neighbor : entry.getValue()) { | ||
| LOG.debug("Neighbor is {}", neighbor); | ||
|
|
@@ -711,40 +709,110 @@ public Set<KnowledgeGap> getKnowledgeGaps(RuleNode plan) { | |
| } | ||
| LOG.debug("Found a neighbor without gaps is {}", foundNeighborWithoutGap); | ||
|
|
||
| if (!foundNeighborWithoutGap) { | ||
| // there is a gap here, either in the current node or in a neighbor. | ||
| if (foundNeighborWithoutGap) continue; | ||
|
|
||
| if (collectedOrGaps.isEmpty()) { | ||
| KnowledgeGap kg = new KnowledgeGap(); | ||
| kg.add(entry.getKey()); | ||
| collectedOrGaps.add(kg); | ||
| } | ||
| LOG.debug("CollectedOrGaps is {}", collectedOrGaps); | ||
|
|
||
| Set<KnowledgeGap> newExistingOrGaps = new HashSet<KnowledgeGap>(); | ||
| if (existingOrGaps.isEmpty()) { | ||
| existingOrGaps.addAll(collectedOrGaps); | ||
| LOG.debug("Added collectedOrGaps to existingOrGaps"); | ||
| } else { | ||
| KnowledgeGap newGap; | ||
| for (KnowledgeGap existingOrGap : existingOrGaps) { | ||
| for (KnowledgeGap collectedOrGap : collectedOrGaps) { | ||
| newGap = new KnowledgeGap(); | ||
| newGap.addAll(existingOrGap); | ||
| newGap.addAll(collectedOrGap); | ||
| LOG.debug("Found newGap {}", newGap); | ||
| newExistingOrGaps.add(newGap); | ||
| } | ||
| } | ||
| existingOrGaps = newExistingOrGaps; | ||
| } | ||
| // there is a gap here, either in the current node or in a neighbor | ||
|
|
||
| if (collectedOrGaps.isEmpty()) { | ||
| KnowledgeGap kg = new KnowledgeGap(); | ||
| kg.add(entry.getKey()); | ||
| collectedOrGaps.add(kg); | ||
| } | ||
| LOG.debug("CollectedOrGaps is {}", collectedOrGaps); | ||
|
|
||
| existingOrGaps = mergeGaps(existingOrGaps, collectedOrGaps); | ||
| } | ||
| LOG.debug("Found existingOrGaps {}", existingOrGaps); | ||
| return existingOrGaps; | ||
| } | ||
|
|
||
| /** | ||
| * Given two triples, decides how they should be combined into a (single) knowledge gap | ||
| * | ||
| * @param existingTriple First triple | ||
| * @param newTriple Second triple | ||
| * @return Action that should be taken when merging these two triples in a knowledge gap | ||
|
Sophietje marked this conversation as resolved.
|
||
| */ | ||
| private static TripleMatchType getTripleMatchType(TriplePattern existingTriple, TriplePattern newTriple) { | ||
| Map<TripleNode, TripleNode> matches = existingTriple.findMatches(newTriple); | ||
| if (matches == null) { | ||
| return TripleMatchType.ADD_TRIPLE; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I always try to have only a single
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this specific case I think I disagree. I'd prefer a clear |
||
| } | ||
|
|
||
| ArrayList<TripleMatchType> matchType = new ArrayList<>(); | ||
|
Sophietje marked this conversation as resolved.
|
||
| for (Entry<TripleNode, TripleNode> match : matches.entrySet()) { | ||
| if (match.getKey().node.isVariable() && match.getValue().node.isConcrete()) { | ||
| matchType.add(TripleMatchType.REPLACE_TRIPLE); | ||
| } else if (match.getKey().node.isConcrete() && match.getValue().node.isVariable()) { | ||
| matchType.add(TripleMatchType.IGNORE_TRIPLE); | ||
| } | ||
| } | ||
|
|
||
| TripleMatchType result; | ||
| if (matchType.isEmpty()) { | ||
| result = TripleMatchType.IGNORE_TRIPLE; | ||
| } else if (matchType.stream().allMatch(m -> m.equals(matchType.get(0)))) { | ||
| result = matchType.get(0); | ||
| } else { | ||
| result = TripleMatchType.ADD_TRIPLE; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| /** | ||
| * Merges two Knowledge Gaps into one Knowledge Gap | ||
| * @param gap First knowledge gap | ||
| * @param gapToAdd Knowledge gap that should be added/merged into the first knowledge gap | ||
| * @return one Knowledge Gap containing triples from both provided knowledge gaps, ignoring triples that | ||
| * are duplicates or more generic (e.g. {@code ?id hasCountry ?c} is ignored if there is already | ||
| * {@code ?id hasCountry Ireland}) | ||
| */ | ||
| private static KnowledgeGap mergeGap(KnowledgeGap gap, KnowledgeGap gapToAdd) { | ||
| KnowledgeGap result = new KnowledgeGap(gap); | ||
| for (TriplePattern tripleToAdd : gapToAdd) { | ||
| Map<TriplePattern, TripleMatchType> tripleActions = new HashMap<>(); | ||
| for (TriplePattern existingTriple : gap) { | ||
| TripleMatchType tripleAction = getTripleMatchType(existingTriple, tripleToAdd); | ||
| tripleActions.put(existingTriple, tripleAction); | ||
| } | ||
| Set<TriplePattern> replaces = tripleActions.entrySet().stream().filter(t -> | ||
| t.getValue() == TripleMatchType.REPLACE_TRIPLE).map(Map.Entry::getKey).collect(Collectors.toSet()); | ||
|
|
||
| if (replaces.size() == 1) { | ||
| TriplePattern toReplace = replaces.iterator().next(); | ||
| result.remove(toReplace); | ||
| result.add(tripleToAdd); | ||
| } else if (!tripleActions.containsValue(TripleMatchType.IGNORE_TRIPLE)) { | ||
| result.add(tripleToAdd); | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| /** | ||
| * Given multiple knowledge gaps, adds the gaps in gapstoAdd to all knowledge gaps in listOfGaps | ||
| * @param listOfGaps list of knowledge gaps to which new knowledge gaps should be added | ||
| * @param gapsToAdd knowledge gaps that should be added to the gaps in listOfGaps | ||
| * @return Set of KnowledgeGaps where knowledge gaps in gapsToAdd have been merged/added to the gaps in listOfGaps | ||
| */ | ||
| private static Set<KnowledgeGap> mergeGaps(Set<KnowledgeGap> listOfGaps, Set<KnowledgeGap> gapsToAdd) { | ||
| if (listOfGaps.isEmpty()) { | ||
| return gapsToAdd; | ||
| } else if (gapsToAdd.isEmpty()) { | ||
| return listOfGaps; | ||
| } | ||
|
|
||
| Set<KnowledgeGap> knowledgeGaps = new HashSet<>(); | ||
| for (KnowledgeGap existingGap : listOfGaps) { | ||
| for (KnowledgeGap gapToAdd : gapsToAdd) { | ||
| KnowledgeGap g = mergeGap(existingGap, gapToAdd); | ||
| knowledgeGaps.add(g); | ||
| } | ||
| } | ||
|
|
||
| return knowledgeGaps; | ||
| } | ||
|
|
||
| private boolean isMetaKI(RuleNode neighbor) { | ||
|
|
||
| assert neighbor.getRule() instanceof Rule; | ||
|
|
@@ -765,4 +833,13 @@ private boolean isMetaKI(RuleNode neighbor) { | |
| public void setMatchStrategy(MatchStrategy aStrategy) { | ||
| this.matchStrategy = aStrategy; | ||
| } | ||
|
|
||
| /** | ||
| * Enum that represents which action to take for a triple when merging two knowledge gaps | ||
| * Given a triple, compared to another triple, the first triple can either be | ||
| * (1) Added to the Knowledge Gap, (2) Ignored, or (3) Replace the other triple | ||
| */ | ||
| private enum TripleMatchType { | ||
| ADD_TRIPLE, IGNORE_TRIPLE, REPLACE_TRIPLE | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Sophietje Do you remember why this change was necessary? Because the removed line and the added line do not check the same thing. I vaguely remember looking at this together, but do not remember the reason.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't quite remember. If I change it back to
keySet(), thetestKnowledgeGapNoMatchingVars()exits with the knowledge gap:[?iq hasImage ?image, ?iq type ImperialFaberge, ?iq madeIn ?country, ?iq madeBy ?company]whereas we originally defined the expected knowledge gap to be:[?id madeIn Russia, ?id commissionedBy Alexander III, ?id madeBy House of Fabergé]