From f1abcb56eab229b791cfb77385415fab2fa147cccb0c8beff2f22f01d8e22511 Mon Sep 17 00:00:00 2001 From: Alex Wied <543423+centromere@users.noreply.github.com> Date: Tue, 9 Jun 2026 20:09:16 -0400 Subject: [PATCH] . --- publish/src/app.rs | 57 ++++++++++++++++--------------- publish/src/rdf/ontology.rs | 67 ++++--------------------------------- search/src/index.rs | 25 ++++++++------ 3 files changed, 51 insertions(+), 98 deletions(-) diff --git a/publish/src/app.rs b/publish/src/app.rs index affcca7..413405a 100644 --- a/publish/src/app.rs +++ b/publish/src/app.rs @@ -19,12 +19,10 @@ use ldp::reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; use ldp::traverse::Traverse; use ldp::{RdfSource, RdfSourceUpdateResponse, ResourceRequestBuilder, SerializationOptions}; use oxigraph::io::RdfFormat; -use oxigraph::model::vocab::rdf; +use oxigraph::model::vocab::{rdf, rdfs}; use oxigraph::model::{BaseDirection, Dataset, NamedNode, Quad, Term, TermRef}; use tracing::{debug, error}; -const ANNOTATED_IRI_TYPE: u64 = 0; - #[derive(Debug, Clone)] pub(crate) enum Message { None, @@ -106,25 +104,26 @@ impl Publisher { .build() .expect("Failed to build search index"); - index.remove_all_of_type(ANNOTATED_IRI_TYPE); - ontology.for_each_annotated_iri(|iri, label, comment| { - let mut document = doc!( - Schema::type_field() => ANNOTATED_IRI_TYPE, - Schema::iri_field() => iri, - ); + if let Some(id) = ontology.catalog_id(&rdf::PROPERTY.into_owned()) { index.remove_all_of_type(id); } + if let Some(id) = ontology.catalog_id(&rdfs::CLASS.into_owned()) { index.remove_all_of_type(id); } + for (iri, info) in ontology.iri_info() { + if let Some(type_) = ontology.catalog_id(&info.type_) { + let mut document = doc!( + Schema::type_field() => type_, + Schema::iri_field() => iri.as_str(), + ); - if let Some(label) = label { - document.add_text(Schema::field("label"), label.to_lowercase()); + if let Some(label) = &info.label { + document.add_text(Schema::field("label"), label.to_lowercase()); + } + + if let Some(comment) = &info.comment { + document.add_text(Schema::field("comment"), comment.to_lowercase()); + } + + index.add(document).expect("Unable to add annotated IRI to search index"); } - - if let Some(comment) = comment { - document.add_text(Schema::field("comment"), comment.to_lowercase()); - } - - index - .add(document) - .expect("Unable to add annotated IRI to search index"); - }); + } index.commit().expect("Unable to commit ontology to index"); let mut abbreviated_datatypes = ontology @@ -181,11 +180,8 @@ impl Publisher { .chain(Task::done(Message::CommitIndex)); } Message::IndexRdfSource(rdf_source) => { - let catalog_id = rdf_source - .classes() - .filter_map(|class| self.ontology.catalog_id(&class.into_owned())) - .next(); - if let Some(catalog_id) = catalog_id { + for catalog_id in rdf_source.classes() + .filter_map(|class| self.ontology.catalog_id(&class.into_owned())) { let mut document = SearchDocument::new(); document.add_u64(Schema::type_field(), catalog_id); document.add_text(Schema::iri_field(), rdf_source.origin()); @@ -322,9 +318,15 @@ impl Publisher { } Message::QueryUpdated(new_query) => { if let Some(search_state) = &mut self.search_state { + let type_ = match search_state.action { + SearchResultClickAction::Predicate(_) => self.ontology.catalog_id(&rdf::PROPERTY.into_owned()), + SearchResultClickAction::Object(_) => self.ontology.catalog_id(&rdfs::CLASS.into_owned()), + SearchResultClickAction::URLInput => None, + }; + search_state.results = self .index - .query(0, new_query.as_str(), Schema::all_fields()) + .query(type_, new_query.as_str(), Schema::all_fields()) .expect("Error encountered while querying index") .iter() .map(NamedNode::new_unchecked) @@ -676,7 +678,8 @@ impl Publisher { let header_text = self .ontology .info(result.as_ref()) - .map(|info| info.type_.to_string()) + .and_then(|info| self.ontology.info(info.type_.as_ref())) + .and_then(|info| info.label.clone()) .unwrap_or("Unknown".to_string()); button(text(header_text)) diff --git a/publish/src/rdf/ontology.rs b/publish/src/rdf/ontology.rs index c5d405b..5a798f5 100644 --- a/publish/src/rdf/ontology.rs +++ b/publish/src/rdf/ontology.rs @@ -8,7 +8,6 @@ use oxigraph::model::{ }; use oxigraph::sparql::{QueryResults, SparqlEvaluator}; use std::collections::HashMap; -use std::fmt::Display; const PREFIXES: &[(&str, &str)] = &[ ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), @@ -170,14 +169,7 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only { query.on_queryable_dataset(dataset).execute().unwrap() { for solution in solutions.filter_map(Result::ok) { - let resource_type = solution.get("class").and_then(term_to_named_node).and_then( - |class| match class.as_ref() { - rdf::PROPERTY => Some(ResourceType::Property), - rdfs::CLASS => Some(ResourceType::Class), - _ => None, - }, - ); - + let type_ = solution.get("class").and_then(term_to_named_node); let subject = solution.get("subject").and_then(term_to_named_node); let label = solution.get("label").and_then(term_to_string); let comment = solution.get("comment").and_then(term_to_string); @@ -186,11 +178,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only { .and_then(term_to_boolean) .unwrap_or(false); - if let Some(subject) = subject - && let Some(type_) = resource_type - { + if let Some(subject) = subject && let Some(type_) = type_ { let info = IriInformation { - type_, + type_: type_.clone(), label: label.map(String::from), comment: comment.map(String::from), read_only, @@ -253,25 +243,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only { } } -#[derive(Clone, Debug)] -pub enum ResourceType { - Property, - Class, -} - -impl Display for ResourceType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let str = match self { - ResourceType::Property => "Property".to_string(), - ResourceType::Class => "Class".to_string(), - }; - write!(f, "{}", str) - } -} - #[derive(Clone, Debug)] pub struct IriInformation { - pub type_: ResourceType, + pub type_: NamedNode, pub label: Option, pub comment: Option, pub read_only: bool, @@ -358,38 +332,9 @@ impl Ontology { self.catalog_ids.get(class).copied() } - pub fn for_each_annotated_iri(&self, f: F) - where - F: Fn(&str, Option<&str>, Option<&str>), + pub fn iri_info(&self) -> &HashMap { - let query = SparqlEvaluator::new() - .parse_query( - r"PREFIX rdfs: - -SELECT ?subject ?label ?comment WHERE { - OPTIONAL { - ?subject rdfs:label ?label . - FILTER (LANG(?label) = 'en' || LANG(?label) = '') - } - OPTIONAL { - ?subject rdfs:comment ?comment . - FILTER (LANG(?comment) = 'en' || LANG(?comment) = '') - } -}", - ) - .expect("Unable to parse annotation query"); - - if let QueryResults::Solutions(solutions) = - query.on_queryable_dataset(&self.dataset).execute().unwrap() - { - for solution in solutions.filter_map(Result::ok) { - let label = solution.get("label").and_then(term_to_string); - let comment = solution.get("comment").and_then(term_to_string); - if let Some(Term::NamedNode(subject)) = solution.get("subject") { - f(subject.as_str(), label.as_deref(), comment.as_deref()); - } - } - } + &self.iri_info } pub fn datatypes(&self) -> impl Iterator> { diff --git a/search/src/index.rs b/search/src/index.rs index 6bc18db..5305b77 100644 --- a/search/src/index.rs +++ b/search/src/index.rs @@ -4,8 +4,8 @@ use crate::schema::Schema; use std::path::PathBuf; use tantivy::collector::TopDocs; use tantivy::directory::{ManagedDirectory, MmapDirectory}; -use tantivy::query::{BooleanQuery, Occur, QueryParser}; -use tantivy::schema::{Field, Value}; +use tantivy::query::{BooleanQuery, Occur, QueryParser, TermQuery}; +use tantivy::schema::{Field, IndexRecordOption, Value}; use tantivy::tokenizer::{NgramTokenizer, TokenizerManager}; use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term}; @@ -79,19 +79,24 @@ impl SearchIndex { pub fn query( &self, - _type_: u64, + type_: Option, user_query: &str, default_fields: Vec, ) -> error::Result> { - //let doc_type_term = Term::from_field_u64(Schema::type_field(), type_); - //let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic)); - let parser = QueryParser::for_index(&self.index, default_fields); let (user_query, _) = parser.parse_query_lenient(user_query); - let query = BooleanQuery::new(vec![ - //(Occur::Must, doc_type_query), - (Occur::Must, user_query), - ]); + + let mut subqueries = vec![ + (Occur::Must, user_query) + ]; + + if let Some(type_) = type_ { + let doc_type_term = Term::from_field_u64(Schema::type_field(), type_); + let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic)); + subqueries.push((Occur::Must, doc_type_query)); + } + + let query = BooleanQuery::new(subqueries); let searcher = self.reader.searcher(); let results = searcher.search(&query, &TopDocs::with_limit(10000).order_by_score())?; let mut iris = vec![];