From b53a3ad1e06917f2cd867013357574fe345a1e1eabfa06caf06c1a4e08f0abdb Mon Sep 17 00:00:00 2001 From: Alex Wied <543423+centromere@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:25:47 -0400 Subject: [PATCH] . --- publish/src/app.rs | 57 +++++-- publish/src/rdf/ontologies/ontology.ttl | 3 +- publish/src/rdf/ontology.rs | 192 +++++++++++++----------- publish/src/rdf/vocab.rs | 12 -- search/src/index.rs | 8 +- 5 files changed, 155 insertions(+), 117 deletions(-) diff --git a/publish/src/app.rs b/publish/src/app.rs index eed9633..affcca7 100644 --- a/publish/src/app.rs +++ b/publish/src/app.rs @@ -499,9 +499,9 @@ impl Publisher { let property_label = self .ontology - .label(triple.predicate.as_ref()) - .unwrap_or(self.ontology.abbreviate(triple.predicate.as_ref()).as_str()) - .to_string(); + .info(triple.predicate.as_ref()) + .and_then(|info| info.label.clone()) + .unwrap_or(self.ontology.abbreviate(triple.predicate.as_ref())); let property: Element = if state.read_only { text(property_label).into() @@ -518,7 +518,8 @@ impl Publisher { let value_label = term.value_as_named_node().and_then(|node| { self.ontology - .label(node) + .info(node) + .and_then(|info| info.label.clone()) .map(|label| container(text(label))) }); @@ -639,8 +640,8 @@ impl Publisher { let buttons = entities.map(|entity| { let label = self .ontology - .label(entity.as_ref()) - .map(|label| label.to_string()) + .info(entity.as_ref()) + .and_then(|info| info.label.clone()) .unwrap_or(entity.as_str().to_string()); button(text(label)) .on_press(Message::NewDocument(entity)) @@ -659,19 +660,55 @@ impl Publisher { let search_input = text_input("Query", &search_state.query).on_input(Message::QueryUpdated); - let type_column = table::column("Type", |result: &NamedNode| { - let label = self.ontology.label(result.as_ref()).unwrap_or("Unknown"); + let label_column = table::column("Label", |result: &NamedNode| { + let header_text = self + .ontology + .info(result.as_ref()) + .and_then(|info| info.label.clone()) + .unwrap_or("".to_string()); - button(text(label)) + button(text(header_text)) .on_press(Message::SearchResultClicked(result.clone())) .style(button::text) }); + + let type_column = table::column("Type", |result: &NamedNode| { + let header_text = self + .ontology + .info(result.as_ref()) + .map(|info| info.type_.to_string()) + .unwrap_or("Unknown".to_string()); + + button(text(header_text)) + .on_press(Message::SearchResultClicked(result.clone())) + .style(button::text) + }); + + /*let description_column = table::column("Description", |result: &NamedNode| { + let header_text = self.ontology.info(result.as_ref()) + .and_then(|info| info.comment.clone()) + .unwrap_or("Unknown".to_string()); + + button(text(header_text)) + .on_press(Message::SearchResultClicked(result.clone())) + .style(button::text) + });*/ + let iri_column = table::column("IRI", |result: &NamedNode| { button(text(result.as_str())) .on_press(Message::SearchResultClicked(result.clone())) .style(button::text) }); - let results_table = scrollable(table([type_column, iri_column], &search_state.results)); + + let results_table = scrollable(table( + [ + label_column, + //description_column, + type_column, + iri_column, + ], + &search_state.results, + )); return column![search_input, results_table].into(); } diff --git a/publish/src/rdf/ontologies/ontology.ttl b/publish/src/rdf/ontologies/ontology.ttl index 5e6fdee..9cfcc40 100644 --- a/publish/src/rdf/ontologies/ontology.ttl +++ b/publish/src/rdf/ontologies/ontology.ttl @@ -89,7 +89,8 @@ xsd:integer rdf:type rdfs:Datatype ; ### http://www.w3.org/2001/XMLSchema#nonNegativeInteger -xsd:nonNegativeInteger rdfs:label "Non-negative Integer"@en . +xsd:nonNegativeInteger rdf:type rdfs:Datatype ; + rdfs:label "Non-negative Integer"@en . ### http://www.w3.org/2001/XMLSchema#string diff --git a/publish/src/rdf/ontology.rs b/publish/src/rdf/ontology.rs index 0310f68..c5d405b 100644 --- a/publish/src/rdf/ontology.rs +++ b/publish/src/rdf/ontology.rs @@ -3,12 +3,12 @@ use crate::rdf::vocab::{gl, owl}; use oxigraph::io::{RdfFormat, RdfParser}; use oxigraph::model::vocab::{rdf, rdfs, xsd}; use oxigraph::model::{ - Dataset, NamedNode, NamedNodeRef, NamedOrBlankNode, NamedOrBlankNodeRef, Term, TermRef, Triple, - TripleRef, + Dataset, GraphName, NamedNode, NamedNodeRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, Term, + TermRef, Triple, TripleRef, }; use oxigraph::sparql::{QueryResults, SparqlEvaluator}; -use std::collections::{HashMap, HashSet}; -use tracing::debug; +use std::collections::HashMap; +use std::fmt::Display; const PREFIXES: &[(&str, &str)] = &[ ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), @@ -106,37 +106,62 @@ impl<'a> OntologyBuilder<'a> { } } - fn lookup_iri_information<'b>( - dataset: &Dataset, - classes: impl IntoIterator>, - ) -> HashMap { - let class_filter = classes - .into_iter() - .map(|node| node.to_string()) - .collect::>() - .join(","); - + fn materialize_super_classes(dataset: &mut Dataset) { let query = SparqlEvaluator::new() .parse_query( - format!( - r#"PREFIX gl: -PREFIX rdfs: + r#"PREFIX rdfs: -SELECT DISTINCT ?subject ?label ?comment ?read_only {{ +CONSTRUCT { + ?item a ?parent +} WHERE { + ?item a ?class . + ?class rdfs:subClassOf ?parent . +}"#, + ) + .expect("Unable to parse superclass query"); + + let mut additional_quads = Dataset::new(); + if let QueryResults::Graph(graph) = query.on_queryable_dataset(&*dataset).execute().unwrap() + { + additional_quads.extend(graph.filter_map(Result::ok).map(|triple| { + Quad::new( + triple.subject, + triple.predicate, + triple.object, + GraphName::DefaultGraph, + ) + })); + } + + let old_size = dataset.len(); + dataset.extend(&additional_quads); + let new_size = dataset.len(); + + if new_size > old_size { + Self::materialize_super_classes(dataset) + } + } + + fn iri_information(dataset: &Dataset) -> HashMap { + let query = SparqlEvaluator::new() + .parse_query( + r#"PREFIX rdf: +PREFIX rdfs: +PREFIX gl: + +SELECT DISTINCT ?class ?subject ?label ?comment ?read_only { + VALUES ?class { rdf:Property rdfs:Class } ?subject a ?class . - FILTER(?class IN ({class_filter})) - OPTIONAL {{ + OPTIONAL { ?subject rdfs:label ?label FILTER (LANG(?label) = 'en' || LANG(?label) = '') - }} - OPTIONAL {{ + } + OPTIONAL { ?subject rdfs:comment ?comment FILTER (LANG(?comment) = 'en' || LANG(?comment) = '') - }} - OPTIONAL {{ ?subject gl:readOnly ?read_only }} -}}"# - ) - .as_str(), + } + OPTIONAL { ?subject gl:readOnly ?read_only } +}"#, ) .expect("Unable to parse property query"); @@ -145,6 +170,14 @@ SELECT DISTINCT ?subject ?label ?comment ?read_only {{ query.on_queryable_dataset(dataset).execute().unwrap() { for solution in solutions.filter_map(Result::ok) { + let resource_type = solution.get("class").and_then(term_to_named_node).and_then( + |class| match class.as_ref() { + rdf::PROPERTY => Some(ResourceType::Property), + rdfs::CLASS => Some(ResourceType::Class), + _ => None, + }, + ); + let subject = solution.get("subject").and_then(term_to_named_node); let label = solution.get("label").and_then(term_to_string); let comment = solution.get("comment").and_then(term_to_string); @@ -153,8 +186,11 @@ SELECT DISTINCT ?subject ?label ?comment ?read_only {{ .and_then(term_to_boolean) .unwrap_or(false); - if let Some(subject) = subject { + if let Some(subject) = subject + && let Some(type_) = resource_type + { let info = IriInformation { + type_, label: label.map(String::from), comment: comment.map(String::from), read_only, @@ -180,21 +216,9 @@ SELECT DISTINCT ?subject ?label ?comment ?read_only {{ dataset.extend(quads); } Self::materialize_same_as(&mut dataset); + Self::materialize_super_classes(&mut dataset); - let properties = Self::lookup_iri_information( - &dataset, - [ - rdf::PROPERTY, - rdfs::DATATYPE, - owl::ANNOTATION_PROPERTY, - owl::DATATYPE_PROPERTY, - owl::FUNCTIONAL_PROPERTY, - owl::OBJECT_PROPERTY, - owl::ONTOLOGY_PROPERTY, - ], - ); - - let classes = Self::lookup_iri_information(&dataset, [rdfs::CLASS, owl::CLASS]); + let iri_info = Self::iri_information(&mut dataset); // Full-text search index field names let mut field_map = HashMap::new(); @@ -222,8 +246,7 @@ SELECT DISTINCT ?subject ?label ?comment ?read_only {{ Ok(Ontology { dataset, prefixes, - properties, - classes, + iri_info, field_map, catalog_ids, }) @@ -231,17 +254,33 @@ SELECT DISTINCT ?subject ?label ?comment ?read_only {{ } #[derive(Clone, Debug)] -struct IriInformation { - label: Option, - comment: Option, - read_only: bool, +pub enum ResourceType { + Property, + Class, +} + +impl Display for ResourceType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let str = match self { + ResourceType::Property => "Property".to_string(), + ResourceType::Class => "Class".to_string(), + }; + write!(f, "{}", str) + } +} + +#[derive(Clone, Debug)] +pub struct IriInformation { + pub type_: ResourceType, + pub label: Option, + pub comment: Option, + pub read_only: bool, } pub struct Ontology { dataset: Dataset, prefixes: HashMap, - properties: HashMap, - classes: HashMap, + iri_info: HashMap, field_map: HashMap, catalog_ids: HashMap, } @@ -286,18 +325,9 @@ impl Ontology { } } - pub fn label<'a>(&'a self, node: NamedNodeRef<'a>) -> Option<&'a str> { + pub fn info(&self, node: NamedNodeRef<'_>) -> Option<&IriInformation> { let node = node.into_owned(); - self.classes - .get(&node) - .map(|info| info.label.as_deref()) - .flatten() - .or_else(|| { - self.properties - .get(&node) - .map(|info| info.label.as_deref()) - .flatten() - }) + self.iri_info.get(&node) } pub fn abbreviate(&self, node: NamedNodeRef<'_>) -> String { @@ -378,41 +408,23 @@ SELECT ?subject ?label ?comment WHERE { pub fn is_read_only<'a>(&self, triple: impl Into>) -> bool { let triple = triple.into(); - let read_only_property = self - .properties + self.iri_info .get(&triple.predicate.into_owned()) .map(|info| info.read_only) - .unwrap_or(false); - - let read_only_class = match (triple.predicate, triple.object) { - (rdf::TYPE, TermRef::NamedNode(node)) => self - .classes - .get(&node.into_owned()) - .map(|info| info.read_only) - .unwrap_or(false), - _ => false, - }; - - read_only_property || read_only_class + .unwrap_or(false) } pub fn exclude_read_only_predicate(&self) -> impl Fn(TripleRef<'_>) -> bool + 'static { - let classes = self.classes.clone(); - let properties = self.properties.clone(); - move |triple| { - let read_only_property = properties - .get(&triple.predicate.into_owned()) + let iri_info = self.iri_info.clone(); + move |triple| match (triple.predicate, triple.object) { + (rdf::TYPE, TermRef::NamedNode(node)) => iri_info + .get(&node.into_owned()) .map(|info| info.read_only) - .unwrap_or(false); - - let read_only_class = match (triple.predicate, triple.object) { - (rdf::TYPE, TermRef::NamedNode(node)) => classes - .get(&node.into_owned()) - .map(|info| info.read_only) - .unwrap_or(false), - _ => false, - }; - !(read_only_property || read_only_class) + .unwrap_or(false), + (predicate, _) => iri_info + .get(&predicate.into_owned()) + .map(|info| info.read_only) + .unwrap_or(false), } } diff --git a/publish/src/rdf/vocab.rs b/publish/src/rdf/vocab.rs index 688e4eb..e4efd91 100644 --- a/publish/src/rdf/vocab.rs +++ b/publish/src/rdf/vocab.rs @@ -17,18 +17,6 @@ pub mod owl { pub const SAME_AS: NamedNodeRef = NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#sameAs"); - pub const CLASS: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#Class"); - pub const OBJECT_PROPERTY: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#ObjectProperty"); - pub const DATATYPE_PROPERTY: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#DatatypeProperty"); - pub const ANNOTATION_PROPERTY: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#AnnotationProperty"); - pub const FUNCTIONAL_PROPERTY: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#FunctionalProperty"); - pub const ONTOLOGY_PROPERTY: NamedNodeRef = - NamedNodeRef::new_unchecked("http://www.w3.org/2002/07/owl#OntologyProperty"); } pub mod rda { diff --git a/search/src/index.rs b/search/src/index.rs index 9ee8d63..6bc18db 100644 --- a/search/src/index.rs +++ b/search/src/index.rs @@ -4,8 +4,8 @@ use crate::schema::Schema; use std::path::PathBuf; use tantivy::collector::TopDocs; use tantivy::directory::{ManagedDirectory, MmapDirectory}; -use tantivy::query::{BooleanQuery, Occur, QueryParser, TermQuery}; -use tantivy::schema::{Field, IndexRecordOption, Value}; +use tantivy::query::{BooleanQuery, Occur, QueryParser}; +use tantivy::schema::{Field, Value}; use tantivy::tokenizer::{NgramTokenizer, TokenizerManager}; use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term}; @@ -79,11 +79,11 @@ impl SearchIndex { pub fn query( &self, - type_: u64, + _type_: u64, user_query: &str, default_fields: Vec, ) -> error::Result> { - let doc_type_term = Term::from_field_u64(Schema::type_field(), type_); + //let doc_type_term = Term::from_field_u64(Schema::type_field(), type_); //let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic)); let parser = QueryParser::for_index(&self.index, default_fields);