SHA256
1
0
This commit is contained in:
Alex Wied
2026-06-09 20:09:16 -04:00
parent b53a3ad1e0
commit f1abcb56ea
3 changed files with 51 additions and 98 deletions
+23 -20
View File
@@ -19,12 +19,10 @@ use ldp::reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use ldp::traverse::Traverse; use ldp::traverse::Traverse;
use ldp::{RdfSource, RdfSourceUpdateResponse, ResourceRequestBuilder, SerializationOptions}; use ldp::{RdfSource, RdfSourceUpdateResponse, ResourceRequestBuilder, SerializationOptions};
use oxigraph::io::RdfFormat; use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::rdf; use oxigraph::model::vocab::{rdf, rdfs};
use oxigraph::model::{BaseDirection, Dataset, NamedNode, Quad, Term, TermRef}; use oxigraph::model::{BaseDirection, Dataset, NamedNode, Quad, Term, TermRef};
use tracing::{debug, error}; use tracing::{debug, error};
const ANNOTATED_IRI_TYPE: u64 = 0;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub(crate) enum Message { pub(crate) enum Message {
None, None,
@@ -106,25 +104,26 @@ impl Publisher {
.build() .build()
.expect("Failed to build search index"); .expect("Failed to build search index");
index.remove_all_of_type(ANNOTATED_IRI_TYPE); if let Some(id) = ontology.catalog_id(&rdf::PROPERTY.into_owned()) { index.remove_all_of_type(id); }
ontology.for_each_annotated_iri(|iri, label, comment| { if let Some(id) = ontology.catalog_id(&rdfs::CLASS.into_owned()) { index.remove_all_of_type(id); }
for (iri, info) in ontology.iri_info() {
if let Some(type_) = ontology.catalog_id(&info.type_) {
let mut document = doc!( let mut document = doc!(
Schema::type_field() => ANNOTATED_IRI_TYPE, Schema::type_field() => type_,
Schema::iri_field() => iri, Schema::iri_field() => iri.as_str(),
); );
if let Some(label) = label { if let Some(label) = &info.label {
document.add_text(Schema::field("label"), label.to_lowercase()); document.add_text(Schema::field("label"), label.to_lowercase());
} }
if let Some(comment) = comment { if let Some(comment) = &info.comment {
document.add_text(Schema::field("comment"), comment.to_lowercase()); document.add_text(Schema::field("comment"), comment.to_lowercase());
} }
index index.add(document).expect("Unable to add annotated IRI to search index");
.add(document) }
.expect("Unable to add annotated IRI to search index"); }
});
index.commit().expect("Unable to commit ontology to index"); index.commit().expect("Unable to commit ontology to index");
let mut abbreviated_datatypes = ontology let mut abbreviated_datatypes = ontology
@@ -181,11 +180,8 @@ impl Publisher {
.chain(Task::done(Message::CommitIndex)); .chain(Task::done(Message::CommitIndex));
} }
Message::IndexRdfSource(rdf_source) => { Message::IndexRdfSource(rdf_source) => {
let catalog_id = rdf_source for catalog_id in rdf_source.classes()
.classes() .filter_map(|class| self.ontology.catalog_id(&class.into_owned())) {
.filter_map(|class| self.ontology.catalog_id(&class.into_owned()))
.next();
if let Some(catalog_id) = catalog_id {
let mut document = SearchDocument::new(); let mut document = SearchDocument::new();
document.add_u64(Schema::type_field(), catalog_id); document.add_u64(Schema::type_field(), catalog_id);
document.add_text(Schema::iri_field(), rdf_source.origin()); document.add_text(Schema::iri_field(), rdf_source.origin());
@@ -322,9 +318,15 @@ impl Publisher {
} }
Message::QueryUpdated(new_query) => { Message::QueryUpdated(new_query) => {
if let Some(search_state) = &mut self.search_state { if let Some(search_state) = &mut self.search_state {
let type_ = match search_state.action {
SearchResultClickAction::Predicate(_) => self.ontology.catalog_id(&rdf::PROPERTY.into_owned()),
SearchResultClickAction::Object(_) => self.ontology.catalog_id(&rdfs::CLASS.into_owned()),
SearchResultClickAction::URLInput => None,
};
search_state.results = self search_state.results = self
.index .index
.query(0, new_query.as_str(), Schema::all_fields()) .query(type_, new_query.as_str(), Schema::all_fields())
.expect("Error encountered while querying index") .expect("Error encountered while querying index")
.iter() .iter()
.map(NamedNode::new_unchecked) .map(NamedNode::new_unchecked)
@@ -676,7 +678,8 @@ impl Publisher {
let header_text = self let header_text = self
.ontology .ontology
.info(result.as_ref()) .info(result.as_ref())
.map(|info| info.type_.to_string()) .and_then(|info| self.ontology.info(info.type_.as_ref()))
.and_then(|info| info.label.clone())
.unwrap_or("Unknown".to_string()); .unwrap_or("Unknown".to_string());
button(text(header_text)) button(text(header_text))
+6 -61
View File
@@ -8,7 +8,6 @@ use oxigraph::model::{
}; };
use oxigraph::sparql::{QueryResults, SparqlEvaluator}; use oxigraph::sparql::{QueryResults, SparqlEvaluator};
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Display;
const PREFIXES: &[(&str, &str)] = &[ const PREFIXES: &[(&str, &str)] = &[
("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
@@ -170,14 +169,7 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
query.on_queryable_dataset(dataset).execute().unwrap() query.on_queryable_dataset(dataset).execute().unwrap()
{ {
for solution in solutions.filter_map(Result::ok) { for solution in solutions.filter_map(Result::ok) {
let resource_type = solution.get("class").and_then(term_to_named_node).and_then( let type_ = solution.get("class").and_then(term_to_named_node);
|class| match class.as_ref() {
rdf::PROPERTY => Some(ResourceType::Property),
rdfs::CLASS => Some(ResourceType::Class),
_ => None,
},
);
let subject = solution.get("subject").and_then(term_to_named_node); let subject = solution.get("subject").and_then(term_to_named_node);
let label = solution.get("label").and_then(term_to_string); let label = solution.get("label").and_then(term_to_string);
let comment = solution.get("comment").and_then(term_to_string); let comment = solution.get("comment").and_then(term_to_string);
@@ -186,11 +178,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
.and_then(term_to_boolean) .and_then(term_to_boolean)
.unwrap_or(false); .unwrap_or(false);
if let Some(subject) = subject if let Some(subject) = subject && let Some(type_) = type_ {
&& let Some(type_) = resource_type
{
let info = IriInformation { let info = IriInformation {
type_, type_: type_.clone(),
label: label.map(String::from), label: label.map(String::from),
comment: comment.map(String::from), comment: comment.map(String::from),
read_only, read_only,
@@ -253,25 +243,9 @@ SELECT DISTINCT ?class ?subject ?label ?comment ?read_only {
} }
} }
#[derive(Clone, Debug)]
pub enum ResourceType {
Property,
Class,
}
impl Display for ResourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let str = match self {
ResourceType::Property => "Property".to_string(),
ResourceType::Class => "Class".to_string(),
};
write!(f, "{}", str)
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct IriInformation { pub struct IriInformation {
pub type_: ResourceType, pub type_: NamedNode,
pub label: Option<String>, pub label: Option<String>,
pub comment: Option<String>, pub comment: Option<String>,
pub read_only: bool, pub read_only: bool,
@@ -358,38 +332,9 @@ impl Ontology {
self.catalog_ids.get(class).copied() self.catalog_ids.get(class).copied()
} }
pub fn for_each_annotated_iri<F>(&self, f: F) pub fn iri_info(&self) -> &HashMap<NamedNode, IriInformation>
where
F: Fn(&str, Option<&str>, Option<&str>),
{ {
let query = SparqlEvaluator::new() &self.iri_info
.parse_query(
r"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?subject ?label ?comment WHERE {
OPTIONAL {
?subject rdfs:label ?label .
FILTER (LANG(?label) = 'en' || LANG(?label) = '')
}
OPTIONAL {
?subject rdfs:comment ?comment .
FILTER (LANG(?comment) = 'en' || LANG(?comment) = '')
}
}",
)
.expect("Unable to parse annotation query");
if let QueryResults::Solutions(solutions) =
query.on_queryable_dataset(&self.dataset).execute().unwrap()
{
for solution in solutions.filter_map(Result::ok) {
let label = solution.get("label").and_then(term_to_string);
let comment = solution.get("comment").and_then(term_to_string);
if let Some(Term::NamedNode(subject)) = solution.get("subject") {
f(subject.as_str(), label.as_deref(), comment.as_deref());
}
}
}
} }
pub fn datatypes(&self) -> impl Iterator<Item = NamedNodeRef<'_>> { pub fn datatypes(&self) -> impl Iterator<Item = NamedNodeRef<'_>> {
+15 -10
View File
@@ -4,8 +4,8 @@ use crate::schema::Schema;
use std::path::PathBuf; use std::path::PathBuf;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::directory::{ManagedDirectory, MmapDirectory}; use tantivy::directory::{ManagedDirectory, MmapDirectory};
use tantivy::query::{BooleanQuery, Occur, QueryParser}; use tantivy::query::{BooleanQuery, Occur, QueryParser, TermQuery};
use tantivy::schema::{Field, Value}; use tantivy::schema::{Field, IndexRecordOption, Value};
use tantivy::tokenizer::{NgramTokenizer, TokenizerManager}; use tantivy::tokenizer::{NgramTokenizer, TokenizerManager};
use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term}; use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term};
@@ -79,19 +79,24 @@ impl SearchIndex {
pub fn query( pub fn query(
&self, &self,
_type_: u64, type_: Option<u64>,
user_query: &str, user_query: &str,
default_fields: Vec<Field>, default_fields: Vec<Field>,
) -> error::Result<Vec<String>> { ) -> error::Result<Vec<String>> {
//let doc_type_term = Term::from_field_u64(Schema::type_field(), type_);
//let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic));
let parser = QueryParser::for_index(&self.index, default_fields); let parser = QueryParser::for_index(&self.index, default_fields);
let (user_query, _) = parser.parse_query_lenient(user_query); let (user_query, _) = parser.parse_query_lenient(user_query);
let query = BooleanQuery::new(vec![
//(Occur::Must, doc_type_query), let mut subqueries = vec![
(Occur::Must, user_query), (Occur::Must, user_query)
]); ];
if let Some(type_) = type_ {
let doc_type_term = Term::from_field_u64(Schema::type_field(), type_);
let doc_type_query = Box::new(TermQuery::new(doc_type_term, IndexRecordOption::Basic));
subqueries.push((Occur::Must, doc_type_query));
}
let query = BooleanQuery::new(subqueries);
let searcher = self.reader.searcher(); let searcher = self.reader.searcher();
let results = searcher.search(&query, &TopDocs::with_limit(10000).order_by_score())?; let results = searcher.search(&query, &TopDocs::with_limit(10000).order_by_score())?;
let mut iris = vec![]; let mut iris = vec![];