SHA256
1
0
This commit is contained in:
Alex Wied
2026-06-09 20:09:16 -04:00
parent b53a3ad1e0
commit f1abcb56ea
3 changed files with 51 additions and 98 deletions
+23 -20
View File
@@ -19,12 +19,10 @@ use ldp::reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use ldp::traverse::Traverse;
use ldp::{RdfSource, RdfSourceUpdateResponse, ResourceRequestBuilder, SerializationOptions};
use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::rdf;
use oxigraph::model::vocab::{rdf, rdfs};
use oxigraph::model::{BaseDirection, Dataset, NamedNode, Quad, Term, TermRef};
use tracing::{debug, error};
const ANNOTATED_IRI_TYPE: u64 = 0;
#[derive(Debug, Clone)]
pub(crate) enum Message {
None,
@@ -106,25 +104,26 @@ impl Publisher {
.build()
.expect("Failed to build search index");
index.remove_all_of_type(ANNOTATED_IRI_TYPE);
ontology.for_each_annotated_iri(|iri, label, comment| {
if let Some(id) = ontology.catalog_id(&rdf::PROPERTY.into_owned()) { index.remove_all_of_type(id); }
if let Some(id) = ontology.catalog_id(&rdfs::CLASS.into_owned()) { index.remove_all_of_type(id); }
for (iri, info) in ontology.iri_info() {
if let Some(type_) = ontology.catalog_id(&info.type_) {
let mut document = doc!(
Schema::type_field() => ANNOTATED_IRI_TYPE,
Schema::iri_field() => iri,
Schema::type_field() => type_,
Schema::iri_field() => iri.as_str(),
);
if let Some(label) = label {
if let Some(label) = &info.label {
document.add_text(Schema::field("label"), label.to_lowercase());
}
if let Some(comment) = comment {
if let Some(comment) = &info.comment {
document.add_text(Schema::field("comment"), comment.to_lowercase());
}
index
.add(document)
.expect("Unable to add annotated IRI to search index");
});
index.add(document).expect("Unable to add annotated IRI to search index");
}
}
index.commit().expect("Unable to commit ontology to index");
let mut abbreviated_datatypes = ontology
@@ -181,11 +180,8 @@ impl Publisher {
.chain(Task::done(Message::CommitIndex));
}
Message::IndexRdfSource(rdf_source) => {
let catalog_id = rdf_source
.classes()
.filter_map(|class| self.ontology.catalog_id(&class.into_owned()))
.next();
if let Some(catalog_id) = catalog_id {
for catalog_id in rdf_source.classes()
.filter_map(|class| self.ontology.catalog_id(&class.into_owned())) {
let mut document = SearchDocument::new();
document.add_u64(Schema::type_field(), catalog_id);
document.add_text(Schema::iri_field(), rdf_source.origin());
@@ -322,9 +318,15 @@ impl Publisher {
}
Message::QueryUpdated(new_query) => {
if let Some(search_state) = &mut self.search_state {
let type_ = match search_state.action {
SearchResultClickAction::Predicate(_) => self.ontology.catalog_id(&rdf::PROPERTY.into_owned()),
SearchResultClickAction::Object(_) => self.ontology.catalog_id(&rdfs::CLASS.into_owned()),
SearchResultClickAction::URLInput => None,
};
search_state.results = self
.index
.query(0, new_query.as_str(), Schema::all_fields())
.query(type_, new_query.as_str(), Schema::all_fields())
.expect("Error encountered while querying index")
.iter()
.map(NamedNode::new_unchecked)
@@ -676,7 +678,8 @@ impl Publisher {
let header_text = self
.ontology
.info(result.as_ref())
.map(|info| info.type_.to_string())
.and_then(|info| self.ontology.info(info.type_.as_ref()))
.and_then(|info| info.label.clone())
.unwrap_or("Unknown".to_string());
button(text(header_text))
+6 -61
View File
@@ -8,7 +8,6 @@ use oxigraph::model::{
};
use oxigraph::sparql::{QueryResults, SparqlEvaluator};
use std::collections::HashMap;
use std::fmt::Display;
const PREFIXES: &[(&str, &str)] = &[
("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
@@ -170,14 +169,7 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
query.on_queryable_dataset(dataset).execute().unwrap()
{
for solution in solutions.filter_map(Result::ok) {
let resource_type = solution.get("class").and_then(term_to_named_node).and_then(
|class| match class.as_ref() {
rdf::PROPERTY => Some(ResourceType::Property),
rdfs::CLASS => Some(ResourceType::Class),
_ => None,
},
);
let type_ = solution.get("class").and_then(term_to_named_node);
let subject = solution.get("subject").and_then(term_to_named_node);
let label = solution.get("label").and_then(term_to_string);
let comment = solution.get("comment").and_then(term_to_string);
@@ -186,11 +178,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
.and_then(term_to_boolean)
.unwrap_or(false);
if let Some(subject) = subject
&& let Some(type_) = resource_type
{
if let Some(subject) = subject && let Some(type_) = type_ {
let info = IriInformation {
type_,
type_: type_.clone(),
label: label.map(String::from),
comment: comment.map(String::from),
read_only,
@@ -253,25 +243,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
}
}
#[derive(Clone, Debug)]
pub enum ResourceType {
Property,
Class,
}
impl Display for ResourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let str = match self {
ResourceType::Property => "Property".to_string(),
ResourceType::Class => "Class".to_string(),
};
write!(f, "{}", str)
}
}
#[derive(Clone, Debug)]
pub struct IriInformation {
pub type_: ResourceType,
pub type_: NamedNode,
pub label: Option<String>,
pub comment: Option<String>,
pub read_only: bool,
@@ -358,38 +332,9 @@ impl Ontology {
self.catalog_ids.get(class).copied()
}
pub fn for_each_annotated_iri<F>(&self, f: F)
where
F: Fn(&str, Option<&str>, Option<&str>),
pub fn iri_info(&self) -> &HashMap<NamedNode, IriInformation>
{
let query = SparqlEvaluator::new()
.parse_query(
r"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?subject ?label ?comment WHERE {
OPTIONAL {
?subject rdfs:label ?label .
FILTER (LANG(?label) = 'en' || LANG(?label) = '')
}
OPTIONAL {
?subject rdfs:comment ?comment .
FILTER (LANG(?comment) = 'en' || LANG(?comment) = '')
}
}",
)
.expect("Unable to parse annotation query");
if let QueryResults::Solutions(solutions) =
query.on_queryable_dataset(&self.dataset).execute().unwrap()
{
for solution in solutions.filter_map(Result::ok) {
let label = solution.get("label").and_then(term_to_string);
let comment = solution.get("comment").and_then(term_to_string);
if let Some(Term::NamedNode(subject)) = solution.get("subject") {
f(subject.as_str(), label.as_deref(), comment.as_deref());
}
}
}
&self.iri_info
}
pub fn datatypes(&self) -> impl Iterator<Item = NamedNodeRef<'_>> {
+15 -10
View File
@@ -4,8 +4,8 @@ use crate::schema::Schema;
use std::path::PathBuf;
use tantivy::collector::TopDocs;
use tantivy::directory::{ManagedDirectory, MmapDirectory};
use tantivy::query::{BooleanQuery, Occur, QueryParser};
use tantivy::schema::{Field, Value};
use tantivy::query::{BooleanQuery, Occur, QueryParser, TermQuery};
use tantivy::schema::{Field, IndexRecordOption, Value};
use tantivy::tokenizer::{NgramTokenizer, TokenizerManager};
use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term};
@@ -79,19 +79,24 @@ impl SearchIndex {
pub fn query(
&self,
_type_: u64,
type_: Option<u64>,
user_query: &str,
default_fields: Vec<Field>,
) -> error::Result<Vec<String>> {
//let doc_type_term = Term::from_field_u64(Schema::type_field(), type_);
//let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic));
let parser = QueryParser::for_index(&self.index, default_fields);
let (user_query, _) = parser.parse_query_lenient(user_query);
let query = BooleanQuery::new(vec![
//(Occur::Must, doc_type_query),
(Occur::Must, user_query),
]);
let mut subqueries = vec![
(Occur::Must, user_query)
];
if let Some(type_) = type_ {
let doc_type_term = Term::from_field_u64(Schema::type_field(), type_);
let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic));
subqueries.push((Occur::Must, doc_type_query));
}
let query = BooleanQuery::new(subqueries);
let searcher = self.reader.searcher();
let results = searcher.search(&query, &TopDocs::with_limit(10000).order_by_score())?;
let mut iris = vec![];