From 7528ebbaa0145b09e6fa21467d5d9dc3ce3ed076 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 12 Feb 2026 14:16:28 -0800 Subject: [PATCH 01/27] - Added PubMed tracking fields to DatasetStatus table: columns to cache PubMed Id, PubMed search strategy, and user dismissal status - Updated panoramapublic.xml to be consistent with changes to DatasetStatus - Changed schema versionf rom 25.003 to 25.004 --- .../panoramapublic-25.003-25.004.sql | 20 +++++++++++ .../resources/schemas/panoramapublic.xml | 9 +++++ .../panoramapublic/PanoramaPublicModule.java | 2 +- .../panoramapublic/model/DatasetStatus.java | 33 +++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql new file mode 100644 index 00000000..dabd520c --- /dev/null +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2026 LabKey Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add PubMed publication tracking columns to DatasetStatus +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPubMedId VARCHAR(255); +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PubMedSearchStrategy TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPubMed BOOLEAN DEFAULT FALSE; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 6b3acf79..49e00a14 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -864,6 +864,15 @@ + + Cached PubMed ID(s) found for this dataset + + + Search strategy that found the publication + + + User indicated the found publication is not related to their data + \ No newline at end of file diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java index cc5aca7e..51e41c3c 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java @@ -92,7 +92,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 25.003; + return 25.004; } @Override diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 717da27d..ea31c2cd 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -12,6 +12,9 @@ public class DatasetStatus extends DbEntity private Date _lastReminderDate; private Date _extensionRequestedDate; private Date _deletionRequestedDate; + private String _potentialPubMedId; + private String _pubMedSearchStrategy; + private Boolean _userDismissedPubMed; public int getExperimentAnnotationsId() { @@ -72,4 +75,34 @@ public boolean reminderSent() { return _lastReminderDate != null; } + + public String getPotentialPubMedId() + { + return _potentialPubMedId; + } + + public void setPotentialPubMedId(String potentialPubMedId) + { + _potentialPubMedId = potentialPubMedId; + } + + public String getPubMedSearchStrategy() + { + return _pubMedSearchStrategy; + } + + public void setPubMedSearchStrategy(String pubMedSearchStrategy) + { + _pubMedSearchStrategy = pubMedSearchStrategy; + } + + public Boolean getUserDismissedPubMed() + { + return _userDismissedPubMed; + } + + public void setUserDismissedPubMed(Boolean userDismissedPubMed) + { + _userDismissedPubMed = userDismissedPubMed; + } } From 6d38d1ae82e2c3adce0f7dc9b057b06e4d70076b Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 12 Feb 2026 17:31:45 -0800 Subject: [PATCH 02/27] Added NcbiPublicationSearchService.java --- .../ncbi/NcbiPublicationSearchService.java | 797 ++++++++++++++++++ 1 file changed, 797 insertions(+) create mode 100644 panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java new file mode 100644 index 00000000..59a7fcd9 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -0,0 +1,797 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.json.JSONArray; +import org.json.JSONObject; +import org.labkey.api.util.logging.LogHelper; +import org.labkey.panoramapublic.datacite.DataCiteService; +import org.labkey.panoramapublic.model.ExperimentAnnotations; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Service for searching PubMed Central (PMC) and PubMed for publications associated with private Panorama Public datasets. + */ +public class NcbiPublicationSearchService +{ + private static final Logger LOG = LogHelper.getLogger(NcbiPublicationSearchService.class, "Search NCBI for publications associated with Panorama Public datasets"); + + // NCBI API endpoints + private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; + private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; + + // API parameters + private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec + private static final int MAX_RESULTS = 5; + private static final int TIMEOUT_MS = 10000; // 10 seconds + private static final String NCBI_EMAIL = "panoramapublic@proteinms.net"; + + // Title keyword extraction + private static final int MIN_KEYWORD_LENGTH = 5; + private static final int MAX_KEYWORDS = 5; + + // Stop words for title matching + private static final Set TITLE_STOP_WORDS = Set.of( + "analysis", "study", "using", "based", "data", "dataset", "proteomics", + "method", "methods", "approach", "application", "investigation", + "examination", "characterization", "identification", "quantification", + "comparison", "evaluation" + ); + + // Preprint indicators + private static final String[] PREPRINT_INDICATORS = { + "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", + "research square", "preprints.org" + }; + + /** + * Search for publications associated with the given experiment. + * Search PubMed Central first, then fall back to PubMed if needed. + */ + public static NcbiPublicationSearchResult searchForPublication(@NotNull ExperimentAnnotations expAnnotations) + { + LOG.info("Starting publication search for experiment: " + expAnnotations.getId()); + + // Track PMC IDs found by each strategy + Map> pmcIdsByStrategy = new HashMap<>(); + + // Step 1: Search PMC by PX ID + if (!StringUtils.isBlank(expAnnotations.getPxid())) + { + LOG.info("Searching PMC by PX ID: " + expAnnotations.getPxid()); + List ids = searchPmc(quote(expAnnotations.getPxid())); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put("PMC_PX_ID", ids); + LOG.info("Found " + ids.size() + " PMC articles by PX ID"); + } + rateLimit(); + } + + // Step 2: Search PMC by Panorama URL + if (expAnnotations.getShortUrl() != null) + { + String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); + LOG.info("Searching PMC by Panorama URL: " + panoramaUrl); + List ids = searchPmc(quote(panoramaUrl)); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put("PMC_PanoramaURL", ids); + LOG.info("Found " + ids.size() + " PMC articles by Panorama URL"); + } + rateLimit(); + } + + // Step 3: Search PMC by DOI + if (!StringUtils.isBlank(expAnnotations.getDoi())) + { + String doiUrl = expAnnotations.getDoi().startsWith("http") + ? expAnnotations.getDoi() + : DataCiteService.toUrl(expAnnotations.getDoi()); + LOG.info("Searching PMC by DOI: " + doiUrl); + List ids = searchPmc(quote(doiUrl)); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put("PMC_DOI", ids); + LOG.info("Found " + ids.size() + " PMC articles by DOI"); + } + rateLimit(); + } + + // Accumulate unique PMC IDs and track which strategies found each + Set uniquePmcIds = new HashSet<>(); + Map> idToStrategies = new HashMap<>(); + + for (Map.Entry> entry : pmcIdsByStrategy.entrySet()) + { + String strategy = entry.getKey(); + for (String id : entry.getValue()) + { + uniquePmcIds.add(id); + idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy); + } + } + + LOG.info("Total unique PMC IDs found: " + uniquePmcIds.size()); + + // Fetch and verify PMC articles + List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations); + + // Apply priority filtering + List filteredArticles = applyPriorityFiltering(pmcArticles); + + // If no PMC results, fall back to PubMed + if (filteredArticles.isEmpty()) + { + LOG.info("No PMC articles found, trying PubMed fallback"); + filteredArticles = searchPubMedFallback(expAnnotations); + } + + // Build and return result + if (filteredArticles.isEmpty()) + { + LOG.info("No publications found"); + return NcbiPublicationSearchResult.notFound(); + } + + LOG.info("Found " + filteredArticles.size() + " publication(s)"); + return buildSearchResult(filteredArticles); + } + + /** + * Search PubMed Central with the given query + */ + private static List searchPmc(String query) + { + return executeSearch(query, "pmc"); + } + + /** + * Search PubMed with the given query + */ + private static List searchPubMed(String query) + { + return executeSearch(query, "pubmed"); + } + + /** + * Execute search using NCBI ESearch API + */ + private static List executeSearch(String query, String database) + { + HttpURLConnection conn = null; + try + { + String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); + String urlString = ESEARCH_URL + + "?db=" + database + + "&term=" + encodedQuery + + "&retmax=" + MAX_RESULTS + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); + + URL url = new URL(urlString); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setConnectTimeout(TIMEOUT_MS); + conn.setReadTimeout(TIMEOUT_MS); + + int responseCode = conn.getResponseCode(); + if (responseCode != 200) + { + LOG.warn("NCBI ESearch returned non-200 response: " + responseCode); + return Collections.emptyList(); + } + + BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuilder response = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) + { + response.append(line); + } + reader.close(); + + // Parse JSON response + JSONObject jsonResponse = new JSONObject(response.toString()); + JSONObject esearchresult = jsonResponse.getJSONObject("esearchresult"); + JSONArray idlist = esearchresult.getJSONArray("idlist"); + + List ids = new ArrayList<>(); + for (int i = 0; i < idlist.length(); i++) + { + ids.add(idlist.getString(i)); + } + return ids; + } + catch (Exception e) + { + LOG.error("Error searching " + database + " with query: " + query, e); + return Collections.emptyList(); + } + finally + { + if (conn != null) conn.disconnect(); + } + } + + /** + * Fetch metadata for PMC articles using ESummary API + */ + private static Map fetchPmcMetadata(Collection pmcIds) + { + return fetchMetadata(pmcIds, "pmc"); + } + + /** + * Fetch metadata for PubMed articles using ESummary API + */ + private static Map fetchPubMedMetadata(Collection pmids) + { + return fetchMetadata(pmids, "pubmed"); + } + + /** + * Fetch metadata using NCBI ESummary API (batch request) + */ + private static Map fetchMetadata(Collection ids, String database) + { + if (ids.isEmpty()) return Collections.emptyMap(); + + HttpURLConnection conn = null; + try + { + String idString = String.join(",", ids); + String urlString = ESUMMARY_URL + + "?db=" + database + + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8.toString()) + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); + + URL url = new URL(urlString); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setConnectTimeout(TIMEOUT_MS); + conn.setReadTimeout(TIMEOUT_MS); + + int responseCode = conn.getResponseCode(); + if (responseCode != 200) + { + LOG.warn("NCBI ESummary returned non-200 response: " + responseCode); + return Collections.emptyMap(); + } + + BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuilder response = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) + { + response.append(line); + } + reader.close(); + + // Parse JSON response + JSONObject jsonResponse = new JSONObject(response.toString()); + JSONObject result = jsonResponse.optJSONObject("result"); + if (result == null) return Collections.emptyMap(); + + // Extract metadata for each ID + Map metadata = new HashMap<>(); + for (String id : ids) + { + JSONObject articleData = result.optJSONObject(id); + if (articleData != null) + { + metadata.put(id, articleData); + } + } + return metadata; + } + catch (Exception e) + { + LOG.error("Error fetching " + database + " metadata for IDs: " + ids, e); + return Collections.emptyMap(); + } + finally + { + if (conn != null) conn.disconnect(); + } + } + + /** + * Fetch and verify PMC articles (filter preprints, check author/title matches) + */ + private static List fetchAndVerifyPmcArticles( + Set pmcIds, + Map> idToStrategies, + ExperimentAnnotations expAnnotations) + { + if (pmcIds.isEmpty()) return Collections.emptyList(); + + // Fetch all metadata in batch + Map metadata = fetchPmcMetadata(pmcIds); + + List articles = new ArrayList<>(); + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + + for (String pmcId : pmcIds) + { + JSONObject articleData = metadata.get(pmcId); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + String source = articleData.optString("source", ""); + String journal = articleData.optString("fulljournalname", ""); + LOG.info("Excluded preprint PMC" + pmcId + ": source=\"" + source + "\", journal=\"" + journal + "\""); + continue; + } + + // Check author and title matches + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); + + List matchedBy = new ArrayList<>(); + if (authorMatch) matchedBy.add("Author"); + if (titleMatch) matchedBy.add("Title"); + + // Extract PMID from PMC metadata + String pmid = extractPmidFromPmcMetadata(articleData, pmcId); + + // Get strategies that found this article + List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); + + ArticleMatch article = new ArticleMatch( + pmid, + String.join(", ", strategies), + String.join(", ", matchedBy) + ); + articles.add(article); + + LOG.info("PMC" + pmcId + " -> PMID " + pmid + " | Found by: " + article.getFoundBy() + + " | Matched: " + article.getMatchedBy()); + } + + return articles; + } + + /** + * Apply priority filtering to narrow down results + */ + private static List applyPriorityFiltering(List articles) + { + if (articles.size() <= 1) return articles; + + // Priority 1: Filter to articles found by multiple data IDs (most reliable) + List multipleIds = articles.stream() + .filter(a -> a.getFoundBy().split(", ").length >= 2) + .collect(Collectors.toList()); + + if (!multipleIds.isEmpty()) + { + LOG.info("Filtered to " + multipleIds.size() + " article(s) found by multiple IDs"); + articles = multipleIds; + } + + if (articles.size() <= 1) return articles; + + // Priority 2: Filter to articles with both Author AND Title match + List bothMatches = articles.stream() + .filter(a -> "Author, Title".equals(a.getMatchedBy())) + .collect(Collectors.toList()); + + if (!bothMatches.isEmpty()) + { + LOG.info("Filtered to " + bothMatches.size() + " article(s) with both Author and Title match"); + return bothMatches; + } + + return articles; + } + + /** + * Fall back to PubMed search if PMC finds nothing + */ + private static List searchPubMedFallback(ExperimentAnnotations expAnnotations) + { + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + String title = expAnnotations.getTitle(); + + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName) || StringUtils.isBlank(title)) + { + LOG.info("Cannot perform PubMed fallback - missing author or title information"); + return Collections.emptyList(); + } + + // Search PubMed: "LastName FirstName[Author] AND Title NOT preprint[Publication Type]" + String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", + lastName, firstName, title); + + LOG.info("PubMed fallback query: " + query); + List pmids = searchPubMed(query); + + if (pmids.isEmpty()) + { + LOG.info("PubMed fallback found no results"); + return Collections.emptyList(); + } + + LOG.info("PubMed fallback found " + pmids.size() + " result(s), verifying..."); + + // Fetch metadata and verify + Map metadata = fetchPubMedMetadata(pmids); + + List articles = new ArrayList<>(); + for (String pmid : pmids) + { + JSONObject articleData = metadata.get(pmid); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + LOG.info("Excluded preprint PMID " + pmid); + continue; + } + + // Verify both author AND title match + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, title); + + // Only accept if BOTH match + if (authorMatch && titleMatch) + { + ArticleMatch article = new ArticleMatch(pmid, "PubMed_Author+Title", "Author, Title"); + articles.add(article); + LOG.info("PMID " + pmid + " verified with both Author and Title match"); + } + } + + LOG.info("Verified " + articles.size() + " of " + pmids.size() + " PubMed results"); + return articles; + } + + /** + * Check if article is a preprint + */ + private static boolean isPreprint(JSONObject metadata) + { + String source = metadata.optString("source", "").toLowerCase(); + String journal = metadata.optString("fulljournalname", "").toLowerCase(); + + for (String indicator : PREPRINT_INDICATORS) + { + if (source.contains(indicator) || journal.contains(indicator)) + { + return true; + } + } + return false; + } + + /** + * Check if article matches the author (submitter) + */ + private static boolean checkAuthorMatch(JSONObject metadata, String firstName, String lastName) + { + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName)) + { + return false; + } + + JSONArray authors = metadata.optJSONArray("authors"); + if (authors == null || authors.length() == 0) + { + return false; + } + + String firstInitial = firstName.substring(0, 1).toUpperCase(); + String lastNameLower = lastName.toLowerCase(); + + for (int i = 0; i < authors.length(); i++) + { + JSONObject author = authors.optJSONObject(i); + if (author == null) continue; + + String authorName = author.optString("name", "").toLowerCase(); + + // Match format: "LastName FirstInitial" or "LastName F" + if (authorName.startsWith(lastNameLower)) + { + String afterLastName = authorName.substring(lastNameLower.length()).trim(); + if (afterLastName.startsWith(firstInitial.toLowerCase())) + { + return true; + } + } + } + + return false; + } + + /** + * Check if article title matches the dataset title + */ + private static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) + { + if (StringUtils.isBlank(datasetTitle)) + { + return false; + } + + // Try sorttitle first, then title + String articleTitle = normalizeTitle(metadata.optString("sorttitle")); + if (articleTitle.isEmpty()) + { + articleTitle = normalizeTitle(metadata.optString("title")); + } + + if (articleTitle.isEmpty()) + { + return false; + } + + String normalizedDatasetTitle = normalizeTitle(datasetTitle); + + // Try exact match first + if (articleTitle.equals(normalizedDatasetTitle)) + { + return true; + } + + // Fall back to keyword matching + List keywords = extractTitleKeywords(datasetTitle); + if (keywords.isEmpty()) + { + return false; + } + + // All keywords must be present in article title + for (String keyword : keywords) + { + if (!articleTitle.contains(keyword.toLowerCase())) + { + return false; + } + } + + return true; + } + + /** + * Normalize title for comparison (lowercase, remove punctuation, normalize whitespace) + */ + private static String normalizeTitle(String title) + { + if (title == null) return ""; + + return title.toLowerCase() + .replaceAll("[^\\w\\s]", "") // Remove punctuation + .replaceAll("\\s+", " ") // Normalize whitespace + .trim(); + } + + /** + * Extract meaningful keywords from title for matching + */ + private static List extractTitleKeywords(String title) + { + if (StringUtils.isBlank(title)) + { + return Collections.emptyList(); + } + + // Extract words of minimum length + String[] words = title.toLowerCase().split("\\s+"); + List keywords = new ArrayList<>(); + + for (String word : words) + { + // Remove non-alphanumeric characters + word = word.replaceAll("[^a-z0-9]", ""); + + // Check length and exclude stop words + if (word.length() >= MIN_KEYWORD_LENGTH && !TITLE_STOP_WORDS.contains(word)) + { + keywords.add(word); + } + } + + // Return top N keywords + return keywords.stream() + .limit(MAX_KEYWORDS) + .collect(Collectors.toList()); + } + + /** + * Extract PMID from PMC metadata + */ + private static String extractPmidFromPmcMetadata(JSONObject pmcMetadata, String pmcId) + { + // PMC articles contain PMID in articleids array + JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); + if (articleIds != null) + { + for (int i = 0; i < articleIds.length(); i++) + { + JSONObject idObj = articleIds.optJSONObject(i); + if (idObj != null && "pmid".equals(idObj.optString("idtype"))) + { + return idObj.optString("value"); + } + } + } + + // Fallback: use PMC ID if no PMID found + return "PMC" + pmcId; + } + + /** + * Build search result from filtered articles + */ + private static NcbiPublicationSearchResult buildSearchResult(List articles) + { + if (articles.isEmpty()) + { + return NcbiPublicationSearchResult.notFound(); + } + + List pmids = articles.stream() + .map(ArticleMatch::getPmid) + .collect(Collectors.toList()); + + String strategy; + if (articles.size() == 1) + { + ArticleMatch article = articles.get(0); + String matchInfo = article.getMatchedBy().isEmpty() ? "" : "(" + article.getMatchedBy() + ")"; + strategy = article.getFoundBy() + matchInfo; + } + else + { + // Multiple articles: format each on separate line + strategy = articles.stream() + .map(a -> { + String matchInfo = a.getMatchedBy().isEmpty() ? "" : "(" + a.getMatchedBy() + ")"; + return a.getPmid() + ": " + a.getFoundBy() + matchInfo; + }) + .collect(Collectors.joining("\n")); + } + + return NcbiPublicationSearchResult.found(pmids, strategy, articles); + } + + /** + * Rate limiting: wait 400ms between API requests + */ + private static void rateLimit() + { + try + { + Thread.sleep(RATE_LIMIT_DELAY_MS); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + } + } + + /** + * Wrap string in quotes for exact match search + */ + private static String quote(String str) + { + return "\"" + str + "\""; + } + + /** + * Result of a publication search + */ + public static class NcbiPublicationSearchResult + { + private final List _pmids; + private final String _searchStrategy; + private final boolean _found; + private final List _articles; + + private NcbiPublicationSearchResult(List pmids, String searchStrategy, boolean found, List articles) + { + _pmids = pmids != null ? pmids : Collections.emptyList(); + _searchStrategy = searchStrategy; + _found = found; + _articles = articles != null ? articles : Collections.emptyList(); + } + + public static NcbiPublicationSearchResult found(List pmids, String strategy, List articles) + { + return new NcbiPublicationSearchResult(pmids, strategy, true, articles); + } + + public static NcbiPublicationSearchResult notFound() + { + return new NcbiPublicationSearchResult(null, null, false, null); + } + + public boolean isFound() + { + return _found; + } + + public List getPmids() + { + return _pmids; + } + + public String getSearchStrategy() + { + return _searchStrategy; + } + + public List getArticles() + { + return _articles; + } + + public String getPmidsAsString() + { + return String.join(", ", _pmids); + } + } + + /** + * Information about a matched article + */ + public static class ArticleMatch + { + private final String _pmid; + private final String _foundBy; // e.g., "PMC_PX_ID, PMC_DOI" + private final String _matchedBy; // e.g., "Author, Title" + + public ArticleMatch(String pmid, String foundBy, String matchedBy) + { + _pmid = pmid; + _foundBy = foundBy; + _matchedBy = matchedBy; + } + + public String getPmid() + { + return _pmid; + } + + public String getFoundBy() + { + return _foundBy; + } + + public String getMatchedBy() + { + return _matchedBy; + } + } +} From 94e218be11bd294ddb6ddf016c35233f850345cc Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 12 Feb 2026 19:07:43 -0800 Subject: [PATCH 03/27] - Added enablePublicationCheck to PrivateDataReminderSettings - Added checkbox to enable publication check in the private data reminder settings form - Added "Enable Publication Check" checkbox on sendPrivateDataRemindersForm.jsp that can override the value in the saved settings. - Added DismissPubMedSuggestionAction - Added CheckPubMedForDatasetAction to perform publication check on one dataset - New method added to PanoramaPublicNotification for sending message about published paper match - Updated PrivateDataReminderJob to check for publications --- .../PanoramaPublicController.java | 162 +++++++++++++++++- .../PanoramaPublicNotification.java | 78 ++++++--- .../message/PrivateDataReminderSettings.java | 20 +++ .../pipeline/PrivateDataReminderJob.java | 138 ++++++++++++++- .../view/privateDataRemindersSettingsForm.jsp | 13 ++ .../view/sendPrivateDataRemindersForm.jsp | 9 + 6 files changed, 391 insertions(+), 29 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 1a278a0b..29b2ebae 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -155,6 +155,7 @@ import org.labkey.panoramapublic.datacite.DoiMetadata; import org.labkey.panoramapublic.message.PrivateDataMessageScheduler; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.model.CatalogEntry; import org.labkey.panoramapublic.model.DataLicense; import org.labkey.panoramapublic.model.DatasetStatus; @@ -10135,6 +10136,7 @@ public ModelAndView getView(PrivateDataReminderSettingsForm form, boolean reshow form.setDelayUntilFirstReminder(settings.getDelayUntilFirstReminder()); form.setReminderFrequency(settings.getReminderFrequency()); form.setExtensionLength(settings.getExtensionLength()); + form.setEnablePublicationCheck(settings.isEnablePublicationCheck()); } VBox view = new VBox(); @@ -10153,6 +10155,7 @@ public boolean handlePost(PrivateDataReminderSettingsForm form, BindException er settings.setDelayUntilFirstReminder(form.getDelayUntilFirstReminder()); settings.setReminderFrequency(form.getReminderFrequency()); settings.setExtensionLength(form.getExtensionLength()); + settings.setEnablePublicationCheck(form.isEnablePublicationCheck()); PrivateDataReminderSettings.save(settings); PrivateDataMessageScheduler.getInstance().initialize(settings.isEnableReminders()); @@ -10190,6 +10193,7 @@ public static class PrivateDataReminderSettingsForm private Integer _extensionLength; private Integer _reminderFrequency; private Integer _delayUntilFirstReminder; + private boolean _enablePublicationCheck; public boolean isEnabled() { @@ -10240,6 +10244,16 @@ public void setDelayUntilFirstReminder(Integer delayUntilFirstReminder) { _delayUntilFirstReminder = delayUntilFirstReminder; } + + public boolean isEnablePublicationCheck() + { + return _enablePublicationCheck; + } + + public void setEnablePublicationCheck(boolean enablePublicationCheck) + { + _enablePublicationCheck = enablePublicationCheck; + } } @RequiresPermission(AdminOperationsPermission.class) @@ -10258,6 +10272,13 @@ public ModelAndView getView(PrivateDataSendReminderForm form, boolean reshow, Bi return new SimpleErrorView(errors, true); } + // Initialize form with current settings on first view + if (!reshow) + { + PrivateDataReminderSettings settings = PrivateDataReminderSettings.get(); + form.setCheckPublications(settings.isEnablePublicationCheck()); + } + QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); @@ -10290,7 +10311,8 @@ public boolean handlePost(PrivateDataSendReminderForm form, BindException errors PipelineService.get().getPipelineRootSetting(getContainer()), JournalManager.getJournal(getContainer()), form.getSelectedExperimentIds(), - form.getTestMode()); + form.getTestMode(), + form.isCheckPublications()); PipelineService.get().queueJob(job); return true; } @@ -10313,6 +10335,7 @@ public void addNavTrail(NavTree root) public static class PrivateDataSendReminderForm { private boolean _testMode; + private boolean _checkPublications; private String _selectedIds; private String _dataRegionName = null; @@ -10326,6 +10349,16 @@ public void setTestMode(boolean testMode) _testMode = testMode; } + public boolean isCheckPublications() + { + return _checkPublications; + } + + public void setCheckPublications(boolean checkPublications) + { + _checkPublications = checkPublications; + } + public String getSelectedIds() { return _selectedIds; @@ -10551,6 +10584,133 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } } + @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) + public class DismissPubMedSuggestionAction extends UpdateDatasetStatusAction + { + @Override + protected String getConfirmViewTitle() + { + return "Dismiss PubMed Publication Suggestion"; + } + + @Override + protected String getConfirmViewMessage() + { + String message = "You are dismissing the publication suggestion for your data on Panorama Public at " + _exptAnnotations.getShortUrl().renderShortURL(); + if (_datasetStatus != null && !StringUtils.isBlank(_datasetStatus.getPotentialPubMedId())) + { + message += ". We will no longer suggest PubMed ID " + _datasetStatus.getPotentialPubMedId() + " for this dataset"; + } + return message; + } + + @Override + protected void doValidationForAction(Errors errors) + { + if (_datasetStatus == null || StringUtils.isBlank(_datasetStatus.getPotentialPubMedId())) + { + errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); + } + else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPubMed())) + { + errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + + " has already been dismissed"); + } + } + + @Override + protected void updateDatasetStatus(DatasetStatus datasetStatus) + { + datasetStatus.setUserDismissedPubMed(true); + } + + @Override + protected void postNotification() + { + // No notification needed for dismissal + } + + @Override + public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) + { + setTitle("Publication Suggestion Dismissed"); + return new HtmlView(DIV("The publication suggestion has been dismissed for the data at " + _exptAnnotations.getShortUrl().renderShortURL(), + DIV("We will no longer suggest this publication for your dataset."), + BR(), + DIV( + LinkBuilder.labkeyLink("Data Folder", PageFlowUtil.urlProvider(ProjectUrls.class).getBeginURL(_exptAnnotations.getContainer())) + ) + )); + } + } + + @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) + public static class CheckPubMedForDatasetAction extends ReadOnlyApiAction + { + @Override + public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Exception + { + ApiSimpleResponse response = new ApiSimpleResponse(); + + ExperimentAnnotations exptAnnotations = getValidExperimentAnnotations(shortUrlForm, errors); + if (exptAnnotations == null) + { + errors.reject(ERROR_MSG, "Unable to find experiment for the provided short URL"); + return null; + } + + try + { + // Perform the publication search + NcbiPublicationSearchService.NcbiPublicationSearchResult searchResult = + NcbiPublicationSearchService.searchForPublication(exptAnnotations); + + response.put("success", true); + response.put("publicationFound", searchResult.isFound()); + + if (searchResult.isFound()) + { + String pubMedIds = searchResult.getPmidsAsString(); + response.put("pubMedId", pubMedIds); + response.put("searchStrategy", searchResult.getSearchStrategy()); + response.put("pubMedUrl", "https://pubmed.ncbi.nlm.nih.gov/" + searchResult.getPmids().get(0)); + + // Update DatasetStatus with the search result + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); + if (datasetStatus == null) + { + datasetStatus = new DatasetStatus(); + datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); + datasetStatus.setPotentialPubMedId(pubMedIds); + datasetStatus.setPubMedSearchStrategy(searchResult.getSearchStrategy()); + DatasetStatusManager.save(datasetStatus, getUser()); + } + else + { + datasetStatus.setPotentialPubMedId(pubMedIds); + datasetStatus.setPubMedSearchStrategy(searchResult.getSearchStrategy()); + datasetStatus.setUserDismissedPubMed(false); // Reset dismissal flag + DatasetStatusManager.update(datasetStatus, getUser()); + } + + response.put("message", "Publication found: PubMed ID " + pubMedIds); + } + else + { + response.put("message", "No publication found for this dataset"); + } + } + catch (Exception e) + { + LOG.error("Error searching for publication for experiment " + exptAnnotations.getId(), e); + errors.reject(ERROR_MSG, "Error searching for publication: " + e.getMessage()); + return null; + } + + return response; + } + } + public static class ShortUrlForm { private String _shortUrlEntityId; diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index b8633b12..b1310446 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -361,16 +361,20 @@ public static void postDataDeletionRequestMessage(@NotNull Journal journal, @Not public static void postPrivateDataReminderMessage(@NotNull Journal journal, @NotNull JournalSubmission js, @NotNull ExperimentAnnotations expAnnotations, @NotNull User submitter, @NotNull User messagePoster, List notifyUsers, - @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin) + @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin, + boolean publicationFound, @Nullable String pubMedId) { - String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin); - String title = "Action Required: Status Update for Your Private Data on Panorama Public"; + String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, publicationFound, pubMedId); + String title = publicationFound + ? "Congratulations! We Found a Publication Associated with Your Data on Panorama Public" + : "Action Required: Status Update for Your Private Data on Panorama Public"; postNotificationFullTitle(journal, js.getJournalExperiment(), message, messagePoster, title, AnnouncementService.StatusOption.Closed, notifyUsers); } public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations exptAnnotations, @NotNull User submitter, @NotNull JournalSubmission js,@NotNull Announcement announcement, - @NotNull Container announcementContainer, @NotNull User journalAdmin) + @NotNull Container announcementContainer, @NotNull User journalAdmin, + boolean publicationFound, @Nullable String pubMedId) { String shortUrl = exptAnnotations.getShortUrl().renderShortURL(); String makePublicLink = PanoramaPublicController.getMakePublicUrl(exptAnnotations.getId(), exptAnnotations.getContainer()).getURIString(); @@ -386,32 +390,60 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations ActionURL requestExtensionUrl = new ActionURL(PanoramaPublicController.RequestExtensionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId", shortUrlEntityId); - ActionURL requesDeletionUrl = new ActionURL(PanoramaPublicController.RequestDeletionAction.class, exptAnnotations.getContainer()) + ActionURL requestDeletionUrl = new ActionURL(PanoramaPublicController.RequestDeletionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId",shortUrlEntityId); + ActionURL dismissPubMedUrl = new ActionURL(PanoramaPublicController.DismissPubMedSuggestionAction.class, exptAnnotations.getContainer()) + .addParameter("shortUrlEntityId", shortUrlEntityId); ExperimentAnnotations sourceExperiment = ExperimentAnnotationsManager.get(exptAnnotations.getSourceExperimentId()); StringBuilder message = new StringBuilder(); - message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2) - .append("We are reaching out regarding your data on Panorama Public (").append(shortUrl).append("), which has been private since ") - .append(dateString).append(".") - .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) - .append(NL2).append(bold("Is the paper associated with this work already published?")) - .append(NL).append("- If yes: Please make your data public by clicking the \"Make Public\" button in your folder or by clicking this link: ") - .append(bold(link("Make Data Public", makePublicLink))) - .append(". This helps ensure that your valuable research is easily accessible to the community.") - .append(NL).append("- If not: You have a couple of options:") - .append(NL).append(" - ").append(bold("Request an Extension")).append(" - If your paper is still under review, or you need additional time, please let us know by clicking ") - .append(bold(link("Request Extension", requestExtensionUrl.getURIString()))).append(".") - .append(NL).append(" - ").append(bold("Delete from Panorama Public")).append(" - If you no longer wish to host your data on Panorama Public, please click ") - .append(bold(link("Request Deletion", requesDeletionUrl.getURIString()))).append(". ") - .append("We will remove your data from Panorama Public."); - if (sourceExperiment != null) + message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); + + if (publicationFound && !StringUtils.isBlank(pubMedId)) + { + // Message variant when a publication was found + String pubMedUrl = "https://pubmed.ncbi.nlm.nih.gov/" + pubMedId; + message.append("Great news! We found a publication that appears to be associated with your data on Panorama Public (") + .append(shortUrl).append("), which has been private since ").append(dateString).append(".") + .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) + .append(NL2).append(bold("Publication Found:")).append(" ").append(link("PubMed ID " + pubMedId, pubMedUrl)) + .append(NL2).append("Since your work has been published, we encourage you to make your data public so the research community can access it alongside your publication. ") + .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") + .append(bold(link("Make Data Public", makePublicLink))).append(".") + .append(NL2).append(bold("If this publication is not associated with your data:")) + .append(NL).append("Please let us know by ").append(bold(link("clicking here", dismissPubMedUrl.getURIString()))) + .append(", and we will stop suggesting this publication for your dataset.") + .append(NL2).append(bold("Not ready to make your data public yet?")) + .append(NL).append("If you need more time, you can:") + .append(NL).append(" - ").append(bold(link("Request an Extension", requestExtensionUrl.getURIString()))) + .append(" - This will give you additional time before we send another reminder.") + .append(NL).append(" - ").append(bold(link("Request Deletion", requestDeletionUrl.getURIString()))) + .append(" - If you no longer wish to host your data on Panorama Public."); + } + else { - message.append(" However, your source folder (") - .append(getContainerLink(sourceExperiment.getContainer())) - .append(") will remain intact, allowing you to resubmit your data in the future if you wish."); + // Original message variant when no publication was found + message.append("We are reaching out regarding your data on Panorama Public (").append(shortUrl).append("), which has been private since ") + .append(dateString).append(".") + .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) + .append(NL2).append(bold("Is the paper associated with this work already published?")) + .append(NL).append("- If yes: Please make your data public by clicking the \"Make Public\" button in your folder or by clicking this link: ") + .append(bold(link("Make Data Public", makePublicLink))) + .append(". This helps ensure that your valuable research is easily accessible to the community.") + .append(NL).append("- If not: You have a couple of options:") + .append(NL).append(" - ").append(bold("Request an Extension")).append(" - If your paper is still under review, or you need additional time, please let us know by clicking ") + .append(bold(link("Request Extension", requestExtensionUrl.getURIString()))).append(".") + .append(NL).append(" - ").append(bold("Delete from Panorama Public")).append(" - If you no longer wish to host your data on Panorama Public, please click ") + .append(bold(link("Request Deletion", requestDeletionUrl.getURIString()))).append(". ") + .append("We will remove your data from Panorama Public."); + if (sourceExperiment != null) + { + message.append(" However, your source folder (") + .append(getContainerLink(sourceExperiment.getContainer())) + .append(") will remain intact, allowing you to resubmit your data in the future if you wish."); + } } message.append(NL2).append("If you have any questions or need further assistance, please do not hesitate to respond to this message by ") diff --git a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java index ee45663c..b4cc557e 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java +++ b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java @@ -24,12 +24,14 @@ public class PrivateDataReminderSettings public static final String PROP_DELAY_UNTIL_FIRST_REMINDER = "Delay until first reminder (months)"; public static final String PROP_REMINDER_FREQUENCY = "Reminder frequency (months)"; public static final String PROP_EXTENSION_LENGTH = "Extension duration (months)"; + public static final String PROP_ENABLE_PUBLICATION_CHECK = "Enable NCBI publication check"; private static final boolean DEFAULT_ENABLE_REMINDERS = false; public static final String DEFAULT_REMINDER_TIME = "8:00 AM"; private static final int DEFAULT_DELAY_UNTIL_FIRST_REMINDER = 12; // Send the first reminder after the data has been private for a year. private static final int DEFAULT_REMINDER_FREQUENCY = 1; // Send reminders once a month, unless extension or deletion was requested. private static final int DEFAULT_EXTENSION_LENGTH = 6; // Private status of a dataset can be extended by 6 months. + private static final boolean DEFAULT_ENABLE_PUBLICATION_CHECK = false; public static final String DATE_FORMAT_PATTERN = "MMMM d, yyyy"; public static final String REMINDER_TIME_FORMAT = "h:mm a"; @@ -40,6 +42,7 @@ public class PrivateDataReminderSettings private int _delayUntilFirstReminder; private int _reminderFrequency; private int _extensionLength; + private boolean _enablePublicationCheck; public static PrivateDataReminderSettings get() { @@ -70,6 +73,11 @@ public static PrivateDataReminderSettings get() LocalTime reminderTime = tryParseReminderTime(settingsMap.get(PROP_REMINDER_TIME), DEFAULT_REMINDER_TIME); settings.setReminderTime(reminderTime); + + boolean enablePublicationCheck = settingsMap.get(PROP_ENABLE_PUBLICATION_CHECK) == null + ? DEFAULT_ENABLE_PUBLICATION_CHECK + : Boolean.valueOf(settingsMap.get(PROP_ENABLE_PUBLICATION_CHECK)); + settings.setEnablePublicationCheck(enablePublicationCheck); } else { @@ -78,6 +86,7 @@ public static PrivateDataReminderSettings get() settings.setReminderFrequency(DEFAULT_REMINDER_FREQUENCY); settings.setExtensionLength(DEFAULT_EXTENSION_LENGTH); settings.setReminderTime(parseReminderTime(DEFAULT_REMINDER_TIME)); + settings.setEnablePublicationCheck(DEFAULT_ENABLE_PUBLICATION_CHECK); } return settings; @@ -112,6 +121,7 @@ public static void save(PrivateDataReminderSettings settings) settingsMap.put(PROP_REMINDER_FREQUENCY, String.valueOf(settings.getReminderFrequency())); settingsMap.put(PROP_EXTENSION_LENGTH, String.valueOf(settings.getExtensionLength())); settingsMap.put(PROP_REMINDER_TIME, settings.getReminderTimeFormatted()); + settingsMap.put(PROP_ENABLE_PUBLICATION_CHECK, String.valueOf(settings.isEnablePublicationCheck())); settingsMap.save(); } @@ -170,6 +180,16 @@ public void setDelayUntilFirstReminder(int delayUntilFirstReminder) _delayUntilFirstReminder = delayUntilFirstReminder; } + public boolean isEnablePublicationCheck() + { + return _enablePublicationCheck; + } + + public void setEnablePublicationCheck(boolean enablePublicationCheck) + { + _enablePublicationCheck = enablePublicationCheck; + } + public @Nullable Date getReminderValidUntilDate(@NotNull DatasetStatus status) { return status.getLastReminderDate() == null ? null : addMonths(status.getLastReminderDate(), getReminderFrequency()); diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 47d9d962..aa5644c8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -25,6 +25,7 @@ import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.JournalSubmission; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.query.DatasetStatusManager; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -43,6 +44,7 @@ public class PrivateDataReminderJob extends PipelineJob { private boolean _test; + private boolean _forcePublicationCheck; private List _experimentAnnotationsIds; private Journal _panoramaPublic; @@ -56,6 +58,11 @@ public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, b } public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, Journal panoramaPublic, List experimentAnnotationsIds, boolean test) + { + this(info, root, panoramaPublic, experimentAnnotationsIds, test, false); + } + + public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, Journal panoramaPublic, List experimentAnnotationsIds, boolean test, boolean forcePublicationCheck) { super("Panorama Public", info, root); setLogFile(root.getRootFileLike().toNioPathForWrite().resolve(FileUtil.makeFileNameWithTimestamp("PanoramaPublic-private-data-reminder", "log"))); @@ -63,6 +70,7 @@ public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, J _experimentAnnotationsIds = experimentAnnotationsIds; _test = test; + _forcePublicationCheck = forcePublicationCheck; } private static Journal getPanoramaPublic() @@ -164,6 +172,104 @@ public static ReminderDecision skip(String reason) { public String getReason() { return reason; } } + /** + * Container for publication search results used during reminder processing + */ + private static class PublicationCheckResult { + private final boolean publicationFound; + private final String pubMedId; + private final String searchStrategy; + + public PublicationCheckResult(boolean publicationFound, @Nullable String pubMedId, @Nullable String searchStrategy) { + this.publicationFound = publicationFound; + this.pubMedId = pubMedId; + this.searchStrategy = searchStrategy; + } + + public static PublicationCheckResult notFound() { + return new PublicationCheckResult(false, null, null); + } + + public static PublicationCheckResult found(@NotNull String pubMedId, @NotNull String searchStrategy) { + return new PublicationCheckResult(true, pubMedId, searchStrategy); + } + + public boolean isPublicationFound() { return publicationFound; } + public @Nullable String getPubMedId() { return pubMedId; } + public @Nullable String getSearchStrategy() { return searchStrategy; } + } + + /** + * Checks for publications associated with the experiment if enabled in settings + * @param expAnnotations The experiment to check + * @param settings The reminder settings + * @param forceCheck Force publication check regardless of global setting + * @param log Logger for diagnostic messages + * @return PublicationCheckResult with search results + */ + private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnnotations expAnnotations, + @NotNull PrivateDataReminderSettings settings, + boolean forceCheck, + @NotNull Logger log) + { + // Check if publication checking is enabled (either globally or forced for this run) + if (!forceCheck && !settings.isEnablePublicationCheck()) + { + log.debug("Publication checking is disabled in settings"); + return PublicationCheckResult.notFound(); + } + + // Get existing DatasetStatus to check cached results and user dismissals + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); + if (datasetStatus != null) + { + // If user has dismissed the publication suggestion, don't search again + if (Boolean.TRUE.equals(datasetStatus.getUserDismissedPubMed())) + { + log.info(String.format("User has dismissed publication suggestion for experiment %d; skipping search", expAnnotations.getId())); + return PublicationCheckResult.notFound(); + } + + // If we already have a cached PubMed ID, use it + if (!StringUtils.isBlank(datasetStatus.getPotentialPubMedId())) + { + log.info(String.format("Using cached PubMed ID %s for experiment %d", + datasetStatus.getPotentialPubMedId(), expAnnotations.getId())); + return PublicationCheckResult.found( + datasetStatus.getPotentialPubMedId(), + datasetStatus.getPubMedSearchStrategy() + ); + } + } + + // Perform the publication search + log.info(String.format("Searching for publications for experiment %d", expAnnotations.getId())); + try + { + NcbiPublicationSearchService.NcbiPublicationSearchResult searchResult = + NcbiPublicationSearchService.searchForPublication(expAnnotations); + + if (searchResult.isFound()) + { + String pubMedIds = searchResult.getPmidsAsString(); + log.info(String.format("Found publication for experiment %d: PubMed ID %s (strategy: %s)", + expAnnotations.getId(), pubMedIds, searchResult.getSearchStrategy())); + return PublicationCheckResult.found(pubMedIds, searchResult.getSearchStrategy()); + } + else + { + log.info(String.format("No publication found for experiment %d", expAnnotations.getId())); + return PublicationCheckResult.notFound(); + } + } + catch (Exception e) + { + log.error(String.format("Error searching for publication for experiment %d: %s", + expAnnotations.getId(), e.getMessage()), e); + return PublicationCheckResult.notFound(); + } + } + @Override public void run() { @@ -265,18 +371,22 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex return; } + // Check for publications if enabled + PublicationCheckResult publicationResult = checkForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); + if (!context.isTestMode()) { - postReminderMessage(expAnnotations, submission, announcement, submitter, context); + postReminderMessage(expAnnotations, submission, announcement, submitter, publicationResult, context); - updateDatasetStatus(expAnnotations); + updateDatasetStatus(expAnnotations, publicationResult); } processingResults.addProcessed(expAnnotations, announcement); } private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSubmission submission, - Announcement announcement, User submitter, ProcessingContext context) + Announcement announcement, User submitter, PublicationCheckResult publicationResult, + ProcessingContext context) { // Older message threads, pre March 2023, will not have the submitter or lab head on the notify list. Add them. List notifyList = new ArrayList<>(); @@ -294,11 +404,13 @@ private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSu notifyList, announcement, context.getAnnouncementsFolder(), - context.getJournalAdmin() + context.getJournalAdmin(), + publicationResult.isPublicationFound(), + publicationResult.getPubMedId() ); } - private void updateDatasetStatus(ExperimentAnnotations expAnnotations) + private void updateDatasetStatus(ExperimentAnnotations expAnnotations, PublicationCheckResult publicationResult) { DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); if (datasetStatus == null) @@ -306,11 +418,27 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations) datasetStatus = new DatasetStatus(); datasetStatus.setExperimentAnnotationsId(expAnnotations.getId()); datasetStatus.setLastReminderDate(new Date()); + + // Save publication search results if found + if (publicationResult.isPublicationFound()) + { + datasetStatus.setPotentialPubMedId(publicationResult.getPubMedId()); + datasetStatus.setPubMedSearchStrategy(publicationResult.getSearchStrategy()); + } + DatasetStatusManager.save(datasetStatus, getUser()); } else { datasetStatus.setLastReminderDate(new Date()); + + // Save publication search results if found and not already cached + if (publicationResult.isPublicationFound() && StringUtils.isBlank(datasetStatus.getPotentialPubMedId())) + { + datasetStatus.setPotentialPubMedId(publicationResult.getPubMedId()); + datasetStatus.setPubMedSearchStrategy(publicationResult.getSearchStrategy()); + } + DatasetStatusManager.update(datasetStatus, getUser()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp index af474fb9..a3d2873c 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp @@ -127,6 +127,19 @@ + + + <%=h(PrivateDataReminderSettings.PROP_ENABLE_PUBLICATION_CHECK)%> + + + /> +
+ When enabled, the system will search PubMed Central and PubMed for publications associated with private datasets. +
+ If a publication is found, submitters will be notified. +
+ + <%=button("Save").submit(true)%> <%=button("Cancel").href(panoramaPublicAdminUrl)%> diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp index 5708b3c5..2516815d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp @@ -71,6 +71,15 @@ /> + + Check for NCBI Publications: + + /> +
+ When checked, the system will search for publications before sending reminders. +
+ + <%=button("Post Reminders").onClick("submitForm();")%>
From 583ceeb5ad1b5550d18a88b64112230e3f1c8d2c Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 13 Feb 2026 14:51:10 -0800 Subject: [PATCH 04/27] - Replaced pubmedId columns with publicationId since not all article matches will have PubMed Ids. - Post notification to support thread when publication suggestion is dismissed. --- .../panoramapublic-25.003-25.004.sql | 24 ++----- .../resources/schemas/panoramapublic.xml | 11 ++-- .../PanoramaPublicController.java | 61 ++++++++++++------ .../PanoramaPublicNotification.java | 60 +++++++++++++++--- .../panoramapublic/model/DatasetStatus.java | 62 ++++++++++++++----- .../ncbi/NcbiPublicationSearchService.java | 24 +++++-- .../pipeline/PrivateDataReminderJob.java | 56 ++++++++++------- 7 files changed, 203 insertions(+), 95 deletions(-) diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql index dabd520c..dab58907 100644 --- a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -1,20 +1,6 @@ -/* - * Copyright (c) 2026 LabKey Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ --- Add PubMed publication tracking columns to DatasetStatus -ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPubMedId VARCHAR(255); -ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PubMedSearchStrategy TEXT; -ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPubMed BOOLEAN DEFAULT FALSE; +-- Add publication tracking columns to DatasetStatus +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPublicationId VARCHAR(255); +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationType VARCHAR(50); +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationSearchStrategy TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication BOOLEAN DEFAULT FALSE; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 49e00a14..02ec496c 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -864,13 +864,16 @@ - - Cached PubMed ID(s) found for this dataset + + Cached publication ID (PubMed ID, PMC ID etc.) - + + Type of publication ID: PMID, PMC, etc. + + Search strategy that found the publication - + User indicated the found publication is not related to their data diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 29b2ebae..8281d3f0 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10390,7 +10390,7 @@ public void setDataRegionName(String dataRegionName) } @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public abstract class UpdateDatasetStatusAction extends ConfirmAction + public abstract static class UpdateDatasetStatusAction extends ConfirmAction { protected ExperimentAnnotations _exptAnnotations; protected DatasetStatus _datasetStatus; @@ -10585,7 +10585,7 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public class DismissPubMedSuggestionAction extends UpdateDatasetStatusAction + public class DismissPublicationSuggestionAction extends UpdateDatasetStatusAction { @Override protected String getConfirmViewTitle() @@ -10596,22 +10596,26 @@ protected String getConfirmViewTitle() @Override protected String getConfirmViewMessage() { - String message = "You are dismissing the publication suggestion for your data on Panorama Public at " + _exptAnnotations.getShortUrl().renderShortURL(); - if (_datasetStatus != null && !StringUtils.isBlank(_datasetStatus.getPotentialPubMedId())) + String publicationLabel = ""; + if (_datasetStatus != null && !StringUtils.isBlank(_datasetStatus.getPublicationType())) { - message += ". We will no longer suggest PubMed ID " + _datasetStatus.getPotentialPubMedId() + " for this dataset"; + publicationLabel = _datasetStatus.getPublicationLabel(); } - return message; + + HtmlString message = HtmlString.of(DIV("You are dismissing the publication suggestion for your data on Panorama Public at " + _exptAnnotations.getShortUrl().renderShortURL(), BR(), BR(), + DIV("We will no loger suggest " + publicationLabel + " " + _datasetStatus.getPotentialPublicationId() + " for this dataset."))); + + return message.renderToString(); } @Override protected void doValidationForAction(Errors errors) { - if (_datasetStatus == null || StringUtils.isBlank(_datasetStatus.getPotentialPubMedId())) + if (_datasetStatus == null || StringUtils.isBlank(_datasetStatus.getPotentialPublicationId())) { errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); } - else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPubMed())) + else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPublication())) { errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + " has already been dismissed"); @@ -10621,13 +10625,16 @@ else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPubMed())) @Override protected void updateDatasetStatus(DatasetStatus datasetStatus) { - datasetStatus.setUserDismissedPubMed(true); + datasetStatus.setUserDismissedPublication(true); } @Override protected void postNotification() { - // No notification needed for dismissal + // Post a message to the support thread. + JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(_exptAnnotations); + Journal journal = JournalManager.getJournal(submission.getJournalId()); + PanoramaPublicNotification.postPublicationDismissalMessage(journal, submission.getJournalExperiment(), _exptAnnotations, getUser(), _datasetStatus); } @Override @@ -10670,10 +10677,24 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex if (searchResult.isFound()) { - String pubMedIds = searchResult.getPmidsAsString(); - response.put("pubMedId", pubMedIds); + String publicationIds = searchResult.getPmidsAsString(); + String publicationType = searchResult.getPublicationType(); + response.put("publicationId", publicationIds); + response.put("publicationType", publicationType); response.put("searchStrategy", searchResult.getSearchStrategy()); - response.put("pubMedUrl", "https://pubmed.ncbi.nlm.nih.gov/" + searchResult.getPmids().get(0)); + + // Generate appropriate URL based on publication type + String firstId = searchResult.getPmids().get(0); + String publicationUrl; + if (DatasetStatus.TYPE_PMC.equals(publicationType)) + { + publicationUrl = "https://www.ncbi.nlm.nih.gov/pmc/articles/" + firstId + "/"; + } + else + { + publicationUrl = "https://pubmed.ncbi.nlm.nih.gov/" + firstId; + } + response.put("publicationUrl", publicationUrl); // Update DatasetStatus with the search result DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); @@ -10681,19 +10702,21 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex { datasetStatus = new DatasetStatus(); datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); - datasetStatus.setPotentialPubMedId(pubMedIds); - datasetStatus.setPubMedSearchStrategy(searchResult.getSearchStrategy()); + datasetStatus.setPotentialPublicationId(publicationIds); + datasetStatus.setPublicationType(publicationType); + datasetStatus.setPublicationSearchStrategy(searchResult.getSearchStrategy()); DatasetStatusManager.save(datasetStatus, getUser()); } else { - datasetStatus.setPotentialPubMedId(pubMedIds); - datasetStatus.setPubMedSearchStrategy(searchResult.getSearchStrategy()); - datasetStatus.setUserDismissedPubMed(false); // Reset dismissal flag + datasetStatus.setPotentialPublicationId(publicationIds); + datasetStatus.setPublicationType(publicationType); + datasetStatus.setPublicationSearchStrategy(searchResult.getSearchStrategy()); + datasetStatus.setUserDismissedPublication(false); // Reset dismissal flag DatasetStatusManager.update(datasetStatus, getUser()); } - response.put("message", "Publication found: PubMed ID " + pubMedIds); + response.put("message", "Publication found: " + datasetStatus.getPublicationLabel() + " " + publicationIds); } else { diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index b1310446..4f79ab25 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -23,6 +23,7 @@ import org.labkey.panoramapublic.datacite.DataCiteException; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.model.DatasetStatus; import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.JournalExperiment; @@ -358,13 +359,43 @@ public static void postDataDeletionRequestMessage(@NotNull Journal journal, @Not postNotificationFullTitle(journal, je, messageBody.toString(), journalAdmin, messageTitle, AnnouncementService.StatusOption.Active, null); } + public static void postPublicationDismissalMessage(@NotNull Journal journal, @NotNull JournalExperiment je, @NotNull ExperimentAnnotations expAnnotations, User submitter, @Nullable org.labkey.panoramapublic.model.DatasetStatus datasetStatus) + { + User journalAdmin = JournalManager.getJournalAdminUser(journal); + if (journalAdmin == null) + { + throw new NotFoundException(String.format("Could not find an admin user for %s.", journal.getName())); + } + + String messageTitle = "Publication Suggestion Dismissed" +" - " + expAnnotations.getShortUrl().renderShortURL(); + + StringBuilder messageBody = new StringBuilder(); + messageBody.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); + messageBody.append("Thank you for letting us know that the suggested publication is not associated with your data on Panorama Public."); + + if (datasetStatus != null && !StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + { + messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(datasetStatus.getPublicationLabel()).append(" ").append(datasetStatus.getPotentialPublicationId()); + } + + messageBody.append(NL2).append("We will no longer suggest this publication for your dataset. ") + .append("If you would like to make your data public, you can do so at any time ") + .append("by clicking the \"Make Public\" button in your data folder, or by clicking this link: ") + .append(bold(link("Make Data Public", PanoramaPublicController.getMakePublicUrl(expAnnotations.getId(), expAnnotations.getContainer()).getURIString()))) + .append("."); + messageBody.append(NL2).append("Best regards,"); + messageBody.append(NL).append(getUserName(journalAdmin)); + + postNotificationFullTitle(journal, je, messageBody.toString(), journalAdmin, messageTitle, AnnouncementService.StatusOption.Closed, null); + } + public static void postPrivateDataReminderMessage(@NotNull Journal journal, @NotNull JournalSubmission js, @NotNull ExperimentAnnotations expAnnotations, @NotNull User submitter, @NotNull User messagePoster, List notifyUsers, @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin, - boolean publicationFound, @Nullable String pubMedId) + boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType) { - String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, publicationFound, pubMedId); + String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, publicationFound, publicationId, publicationType); String title = publicationFound ? "Congratulations! We Found a Publication Associated with Your Data on Panorama Public" : "Action Required: Status Update for Your Private Data on Panorama Public"; @@ -374,7 +405,7 @@ public static void postPrivateDataReminderMessage(@NotNull Journal journal, @Not public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations exptAnnotations, @NotNull User submitter, @NotNull JournalSubmission js,@NotNull Announcement announcement, @NotNull Container announcementContainer, @NotNull User journalAdmin, - boolean publicationFound, @Nullable String pubMedId) + boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType) { String shortUrl = exptAnnotations.getShortUrl().renderShortURL(); String makePublicLink = PanoramaPublicController.getMakePublicUrl(exptAnnotations.getId(), exptAnnotations.getContainer()).getURIString(); @@ -393,7 +424,7 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations ActionURL requestDeletionUrl = new ActionURL(PanoramaPublicController.RequestDeletionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId",shortUrlEntityId); - ActionURL dismissPubMedUrl = new ActionURL(PanoramaPublicController.DismissPubMedSuggestionAction.class, exptAnnotations.getContainer()) + ActionURL dismissPublicationUrl = new ActionURL(PanoramaPublicController.DismissPublicationSuggestionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId", shortUrlEntityId); ExperimentAnnotations sourceExperiment = ExperimentAnnotationsManager.get(exptAnnotations.getSourceExperimentId()); @@ -401,19 +432,32 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations StringBuilder message = new StringBuilder(); message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); - if (publicationFound && !StringUtils.isBlank(pubMedId)) + if (publicationFound && !StringUtils.isBlank(publicationId)) { // Message variant when a publication was found - String pubMedUrl = "https://pubmed.ncbi.nlm.nih.gov/" + pubMedId; + String publicationUrl; + String publicationLabel; + + if (DatasetStatus.TYPE_PMC.equals(publicationType)) + { + publicationUrl = "https://www.ncbi.nlm.nih.gov/pmc/articles/" + publicationId + "/"; + publicationLabel = "PMC ID " + publicationId; + } + else // Default to PMID + { + publicationUrl = "https://pubmed.ncbi.nlm.nih.gov/" + publicationId; + publicationLabel = "PubMed ID " + publicationId; + } + message.append("Great news! We found a publication that appears to be associated with your data on Panorama Public (") .append(shortUrl).append("), which has been private since ").append(dateString).append(".") .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) - .append(NL2).append(bold("Publication Found:")).append(" ").append(link("PubMed ID " + pubMedId, pubMedUrl)) + .append(NL2).append(bold("Publication Found:")).append(" ").append(link(publicationLabel, publicationUrl)) .append(NL2).append("Since your work has been published, we encourage you to make your data public so the research community can access it alongside your publication. ") .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") .append(bold(link("Make Data Public", makePublicLink))).append(".") .append(NL2).append(bold("If this publication is not associated with your data:")) - .append(NL).append("Please let us know by ").append(bold(link("clicking here", dismissPubMedUrl.getURIString()))) + .append(NL).append("Please let us know by ").append(bold(link("clicking here", dismissPublicationUrl.getURIString()))) .append(", and we will stop suggesting this publication for your dataset.") .append(NL2).append(bold("Not ready to make your data public yet?")) .append(NL).append("If you need more time, you can:") diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index ea31c2cd..87842fd3 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -8,13 +8,17 @@ public class DatasetStatus extends DbEntity { + public static final String TYPE_PMC = "PMC"; + public static final String TYPE_PMID = "PMID"; + private int _experimentAnnotationsId; private Date _lastReminderDate; private Date _extensionRequestedDate; private Date _deletionRequestedDate; - private String _potentialPubMedId; - private String _pubMedSearchStrategy; - private Boolean _userDismissedPubMed; + private String _potentialPublicationId; + private String _publicationType; + private String _publicationSearchStrategy; + private Boolean _userDismissedPublication; public int getExperimentAnnotationsId() { @@ -76,33 +80,61 @@ public boolean reminderSent() return _lastReminderDate != null; } - public String getPotentialPubMedId() + public String getPotentialPublicationId() + { + return _potentialPublicationId; + } + + public void setPotentialPublicationId(String potentialPublicationId) { - return _potentialPubMedId; + _potentialPublicationId = potentialPublicationId; } - public void setPotentialPubMedId(String potentialPubMedId) + public String getPublicationType() { - _potentialPubMedId = potentialPubMedId; + return _publicationType; + } + + public void setPublicationType(String publicationType) + { + _publicationType = publicationType; + } + + public String getPublicationLabel() + { + if (_publicationType == null) + { + return ""; + } + + switch (_publicationType) + { + case TYPE_PMC: + return "PMC ID"; + case TYPE_PMID: + return "PubMed ID"; + default: + return _publicationType; + } } - public String getPubMedSearchStrategy() + public String getPublicationSearchStrategy() { - return _pubMedSearchStrategy; + return _publicationSearchStrategy; } - public void setPubMedSearchStrategy(String pubMedSearchStrategy) + public void setPublicationSearchStrategy(String publicationSearchStrategy) { - _pubMedSearchStrategy = pubMedSearchStrategy; + _publicationSearchStrategy = publicationSearchStrategy; } - public Boolean getUserDismissedPubMed() + public Boolean getUserDismissedPublication() { - return _userDismissedPubMed; + return _userDismissedPublication; } - public void setUserDismissedPubMed(Boolean userDismissedPubMed) + public void setUserDismissedPublication(Boolean userDismissedPublication) { - _userDismissedPubMed = userDismissedPubMed; + _userDismissedPublication = userDismissedPublication; } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 59a7fcd9..9fa82730 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -9,6 +9,7 @@ import org.json.JSONObject; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; +import org.labkey.panoramapublic.model.DatasetStatus; import org.labkey.panoramapublic.model.ExperimentAnnotations; import java.io.BufferedReader; @@ -648,7 +649,7 @@ private static String extractPmidFromPmcMetadata(JSONObject pmcMetadata, String } // Fallback: use PMC ID if no PMID found - return "PMC" + pmcId; + return DatasetStatus.TYPE_PMC + pmcId; } /** @@ -665,6 +666,10 @@ private static NcbiPublicationSearchResult buildSearchResult(List .map(ArticleMatch::getPmid) .collect(Collectors.toList()); + // Determine publication type from first ID (prefer PMID over PMC) + String firstId = pmids.get(0); + String publicationType = firstId.startsWith(DatasetStatus.TYPE_PMC) ? DatasetStatus.TYPE_PMC : DatasetStatus.TYPE_PMID; + String strategy; if (articles.size() == 1) { @@ -683,7 +688,7 @@ private static NcbiPublicationSearchResult buildSearchResult(List .collect(Collectors.joining("\n")); } - return NcbiPublicationSearchResult.found(pmids, strategy, articles); + return NcbiPublicationSearchResult.found(pmids, publicationType, strategy, articles); } /** @@ -715,26 +720,28 @@ private static String quote(String str) public static class NcbiPublicationSearchResult { private final List _pmids; + private final String _publicationType; // "PMID" or "PMC" private final String _searchStrategy; private final boolean _found; private final List _articles; - private NcbiPublicationSearchResult(List pmids, String searchStrategy, boolean found, List articles) + private NcbiPublicationSearchResult(List pmids, String publicationType, String searchStrategy, boolean found, List articles) { _pmids = pmids != null ? pmids : Collections.emptyList(); + _publicationType = publicationType; _searchStrategy = searchStrategy; _found = found; _articles = articles != null ? articles : Collections.emptyList(); } - public static NcbiPublicationSearchResult found(List pmids, String strategy, List articles) + public static NcbiPublicationSearchResult found(List pmids, String publicationType, String strategy, List articles) { - return new NcbiPublicationSearchResult(pmids, strategy, true, articles); + return new NcbiPublicationSearchResult(pmids, publicationType, strategy, true, articles); } public static NcbiPublicationSearchResult notFound() { - return new NcbiPublicationSearchResult(null, null, false, null); + return new NcbiPublicationSearchResult(null, null, null, false, null); } public boolean isFound() @@ -747,6 +754,11 @@ public List getPmids() return _pmids; } + public String getPublicationType() + { + return _publicationType; + } + public String getSearchStrategy() { return _searchStrategy; diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index aa5644c8..b0b786a6 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -177,25 +177,28 @@ public static ReminderDecision skip(String reason) { */ private static class PublicationCheckResult { private final boolean publicationFound; - private final String pubMedId; + private final String publicationId; + private final String publicationType; private final String searchStrategy; - public PublicationCheckResult(boolean publicationFound, @Nullable String pubMedId, @Nullable String searchStrategy) { + public PublicationCheckResult(boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType, @Nullable String searchStrategy) { this.publicationFound = publicationFound; - this.pubMedId = pubMedId; + this.publicationId = publicationId; + this.publicationType = publicationType; this.searchStrategy = searchStrategy; } public static PublicationCheckResult notFound() { - return new PublicationCheckResult(false, null, null); + return new PublicationCheckResult(false, null, null, null); } - public static PublicationCheckResult found(@NotNull String pubMedId, @NotNull String searchStrategy) { - return new PublicationCheckResult(true, pubMedId, searchStrategy); + public static PublicationCheckResult found(@NotNull String publicationId, @NotNull String publicationType, @NotNull String searchStrategy) { + return new PublicationCheckResult(true, publicationId, publicationType, searchStrategy); } public boolean isPublicationFound() { return publicationFound; } - public @Nullable String getPubMedId() { return pubMedId; } + public @Nullable String getPublicationId() { return publicationId; } + public @Nullable String getPublicationType() { return publicationType; } public @Nullable String getSearchStrategy() { return searchStrategy; } } @@ -224,20 +227,21 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn if (datasetStatus != null) { // If user has dismissed the publication suggestion, don't search again - if (Boolean.TRUE.equals(datasetStatus.getUserDismissedPubMed())) + if (Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication())) { log.info(String.format("User has dismissed publication suggestion for experiment %d; skipping search", expAnnotations.getId())); return PublicationCheckResult.notFound(); } - // If we already have a cached PubMed ID, use it - if (!StringUtils.isBlank(datasetStatus.getPotentialPubMedId())) + // If we already have a cached publication ID, use it + if (!StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) { - log.info(String.format("Using cached PubMed ID %s for experiment %d", - datasetStatus.getPotentialPubMedId(), expAnnotations.getId())); + log.info(String.format("Using cached publication %s %s for experiment %d", + datasetStatus.getPublicationType(), datasetStatus.getPotentialPublicationId(), expAnnotations.getId())); return PublicationCheckResult.found( - datasetStatus.getPotentialPubMedId(), - datasetStatus.getPubMedSearchStrategy() + datasetStatus.getPotentialPublicationId(), + datasetStatus.getPublicationType(), + datasetStatus.getPublicationSearchStrategy() ); } } @@ -251,10 +255,11 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn if (searchResult.isFound()) { - String pubMedIds = searchResult.getPmidsAsString(); - log.info(String.format("Found publication for experiment %d: PubMed ID %s (strategy: %s)", - expAnnotations.getId(), pubMedIds, searchResult.getSearchStrategy())); - return PublicationCheckResult.found(pubMedIds, searchResult.getSearchStrategy()); + String publicationIds = searchResult.getPmidsAsString(); + String publicationType = searchResult.getPublicationType(); + log.info(String.format("Found publication for experiment %d: %s %s (strategy: %s)", + expAnnotations.getId(), publicationType, publicationIds, searchResult.getSearchStrategy())); + return PublicationCheckResult.found(publicationIds, publicationType, searchResult.getSearchStrategy()); } else { @@ -406,7 +411,8 @@ private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSu context.getAnnouncementsFolder(), context.getJournalAdmin(), publicationResult.isPublicationFound(), - publicationResult.getPubMedId() + publicationResult.getPublicationId(), + publicationResult.getPublicationType() ); } @@ -422,8 +428,9 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati // Save publication search results if found if (publicationResult.isPublicationFound()) { - datasetStatus.setPotentialPubMedId(publicationResult.getPubMedId()); - datasetStatus.setPubMedSearchStrategy(publicationResult.getSearchStrategy()); + datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); + datasetStatus.setPublicationType(publicationResult.getPublicationType()); + datasetStatus.setPublicationSearchStrategy(publicationResult.getSearchStrategy()); } DatasetStatusManager.save(datasetStatus, getUser()); @@ -433,10 +440,11 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati datasetStatus.setLastReminderDate(new Date()); // Save publication search results if found and not already cached - if (publicationResult.isPublicationFound() && StringUtils.isBlank(datasetStatus.getPotentialPubMedId())) + if (publicationResult.isPublicationFound() && StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) { - datasetStatus.setPotentialPubMedId(publicationResult.getPubMedId()); - datasetStatus.setPubMedSearchStrategy(publicationResult.getSearchStrategy()); + datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); + datasetStatus.setPublicationType(publicationResult.getPublicationType()); + datasetStatus.setPublicationSearchStrategy(publicationResult.getSearchStrategy()); } DatasetStatusManager.update(datasetStatus, getUser()); From 69655ba73aa79483dc831e9ec4a67ca4b5b33b74 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 20 Feb 2026 15:54:18 -0800 Subject: [PATCH 05/27] Clean up NcbiPublicationSearchService. --- .../panoramapublic-25.003-25.004.sql | 2 +- .../resources/schemas/panoramapublic.xml | 4 +- .../PanoramaPublicController.java | 12 +- .../panoramapublic/model/DatasetStatus.java | 26 +- .../ncbi/NcbiPublicationSearchService.java | 341 ++++++++++++------ .../pipeline/PrivateDataReminderJob.java | 28 +- 6 files changed, 256 insertions(+), 157 deletions(-) diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql index dab58907..af421eba 100644 --- a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -2,5 +2,5 @@ -- Add publication tracking columns to DatasetStatus ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPublicationId VARCHAR(255); ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationType VARCHAR(50); -ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationSearchStrategy TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationMatchInfo TEXT; ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication BOOLEAN DEFAULT FALSE; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 02ec496c..3ad59e8e 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -870,8 +870,8 @@ Type of publication ID: PMID, PMC, etc. - - Search strategy that found the publication + + Information about which fields (e.g. PXD, Panorama link, title etc.) matched User indicated the found publication is not related to their data diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 8281d3f0..71fe21b5 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10652,7 +10652,7 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public static class CheckPubMedForDatasetAction extends ReadOnlyApiAction + public static class FindPublicationForDatasetAction extends ReadOnlyApiAction { @Override public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Exception @@ -10677,14 +10677,14 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex if (searchResult.isFound()) { - String publicationIds = searchResult.getPmidsAsString(); + String publicationIds = searchResult.getPublicationIdsAsString(); String publicationType = searchResult.getPublicationType(); response.put("publicationId", publicationIds); response.put("publicationType", publicationType); - response.put("searchStrategy", searchResult.getSearchStrategy()); + response.put("matchInfo", searchResult.getMatchInfo()); // Generate appropriate URL based on publication type - String firstId = searchResult.getPmids().get(0); + String firstId = searchResult.getPublicationIds().get(0); String publicationUrl; if (DatasetStatus.TYPE_PMC.equals(publicationType)) { @@ -10704,14 +10704,14 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); datasetStatus.setPotentialPublicationId(publicationIds); datasetStatus.setPublicationType(publicationType); - datasetStatus.setPublicationSearchStrategy(searchResult.getSearchStrategy()); + datasetStatus.setPublicationMatchInfo(searchResult.getMatchInfo()); DatasetStatusManager.save(datasetStatus, getUser()); } else { datasetStatus.setPotentialPublicationId(publicationIds); datasetStatus.setPublicationType(publicationType); - datasetStatus.setPublicationSearchStrategy(searchResult.getSearchStrategy()); + datasetStatus.setPublicationMatchInfo(searchResult.getMatchInfo()); datasetStatus.setUserDismissedPublication(false); // Reset dismissal flag DatasetStatusManager.update(datasetStatus, getUser()); } diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 87842fd3..6a8c6452 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -17,7 +17,7 @@ public class DatasetStatus extends DbEntity private Date _deletionRequestedDate; private String _potentialPublicationId; private String _publicationType; - private String _publicationSearchStrategy; + private String _publicationMatchInfo; private Boolean _userDismissedPublication; public int getExperimentAnnotationsId() @@ -102,30 +102,22 @@ public void setPublicationType(String publicationType) public String getPublicationLabel() { - if (_publicationType == null) + PublicationType type = PublicationType.fromDatabaseValue(_publicationType); + if (type == null) { - return ""; - } - - switch (_publicationType) - { - case TYPE_PMC: - return "PMC ID"; - case TYPE_PMID: - return "PubMed ID"; - default: - return _publicationType; + return _publicationType != null ? _publicationType : ""; } + return type.getLabel(); } - public String getPublicationSearchStrategy() + public String getPublicationMatchInfo() { - return _publicationSearchStrategy; + return _publicationMatchInfo; } - public void setPublicationSearchStrategy(String publicationSearchStrategy) + public void setPublicationMatchInfo(String publicationMatchInfo) { - _publicationSearchStrategy = publicationSearchStrategy; + _publicationMatchInfo = publicationMatchInfo; } public Boolean getUserDismissedPublication() diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 9fa82730..74eeef4d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -1,16 +1,17 @@ package org.labkey.panoramapublic.ncbi; import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.json.JSONArray; import org.json.JSONObject; +import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.model.DatasetStatus; import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.labkey.panoramapublic.model.PublicationType; import java.io.BufferedReader; import java.io.InputStreamReader; @@ -43,7 +44,13 @@ public class NcbiPublicationSearchService private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec private static final int MAX_RESULTS = 5; private static final int TIMEOUT_MS = 10000; // 10 seconds - private static final String NCBI_EMAIL = "panoramapublic@proteinms.net"; + private static final String NCBI_EMAIL = "panorama@proteinms.net"; + + // Preprint indicators + private static final String[] PREPRINT_INDICATORS = { + "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", + "research square", "preprints.org" + }; // Title keyword extraction private static final int MIN_KEYWORD_LENGTH = 5; @@ -57,32 +64,72 @@ public class NcbiPublicationSearchService "comparison", "evaluation" ); - // Preprint indicators - private static final String[] PREPRINT_INDICATORS = { - "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", - "research square", "preprints.org" - }; + private static final int MAX_RESULTS_LIMIT = 5; + + private static Logger getLog(@Nullable Logger logger) + { + return logger != null ? logger : LOG; + } /** * Search for publications associated with the given experiment. - * Search PubMed Central first, then fall back to PubMed if needed. + * Returns at most 1 result. */ public static NcbiPublicationSearchResult searchForPublication(@NotNull ExperimentAnnotations expAnnotations) { - LOG.info("Starting publication search for experiment: " + expAnnotations.getId()); + return searchForPublication(expAnnotations, 1, null); + } + + /** + * Search for publications associated with the given experiment. + * Search PubMed Central first, then fall back to PubMed if needed. + * @param maxResults maximum number of article matches to return (capped at 5) + * @param logger optional logger; when null, uses the class logger + */ + public static NcbiPublicationSearchResult searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger) + { + Logger log = getLog(logger); + maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS_LIMIT)); + log.info("Starting publication search for experiment: " + expAnnotations.getId()); + + List matchedArticles = searchPmc(expAnnotations, log); + // If no PMC results, fall back to PubMed + if (matchedArticles.isEmpty()) + { + log.info("No PMC articles found, trying PubMed fallback"); + matchedArticles = searchPubMed(expAnnotations, log); + } + + // Build and return result + if (matchedArticles.isEmpty()) + { + log.info("No publications found"); + return NcbiPublicationSearchResult.notFound(); + } + + if (matchedArticles.size() > maxResults) + { + matchedArticles = matchedArticles.subList(0, maxResults); + } + log.info("Returning " + matchedArticles.size() + StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); + return buildSearchResult(matchedArticles); + } + + private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) + { // Track PMC IDs found by each strategy Map> pmcIdsByStrategy = new HashMap<>(); // Step 1: Search PMC by PX ID if (!StringUtils.isBlank(expAnnotations.getPxid())) { - LOG.info("Searching PMC by PX ID: " + expAnnotations.getPxid()); + log.debug("Searching PMC by PX ID: " + expAnnotations.getPxid()); List ids = searchPmc(quote(expAnnotations.getPxid())); if (!ids.isEmpty()) { pmcIdsByStrategy.put("PMC_PX_ID", ids); - LOG.info("Found " + ids.size() + " PMC articles by PX ID"); + log.debug("Found " + ids.size() + " PMC articles by PX ID"); } rateLimit(); } @@ -91,12 +138,12 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime if (expAnnotations.getShortUrl() != null) { String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); - LOG.info("Searching PMC by Panorama URL: " + panoramaUrl); + log.debug("Searching PMC by Panorama URL: " + panoramaUrl); List ids = searchPmc(quote(panoramaUrl)); if (!ids.isEmpty()) { pmcIdsByStrategy.put("PMC_PanoramaURL", ids); - LOG.info("Found " + ids.size() + " PMC articles by Panorama URL"); + log.debug("Found " + ids.size() + " PMC articles by Panorama URL"); } rateLimit(); } @@ -107,12 +154,12 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime String doiUrl = expAnnotations.getDoi().startsWith("http") ? expAnnotations.getDoi() : DataCiteService.toUrl(expAnnotations.getDoi()); - LOG.info("Searching PMC by DOI: " + doiUrl); + log.debug("Searching PMC by DOI: " + doiUrl); List ids = searchPmc(quote(doiUrl)); if (!ids.isEmpty()) { pmcIdsByStrategy.put("PMC_DOI", ids); - LOG.info("Found " + ids.size() + " PMC articles by DOI"); + log.debug("Found " + ids.size() + " PMC articles by DOI"); } rateLimit(); } @@ -131,30 +178,13 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime } } - LOG.info("Total unique PMC IDs found: " + uniquePmcIds.size()); + log.info("Total unique PMC IDs found: " + uniquePmcIds.size()); // Fetch and verify PMC articles - List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations); + List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); // Apply priority filtering - List filteredArticles = applyPriorityFiltering(pmcArticles); - - // If no PMC results, fall back to PubMed - if (filteredArticles.isEmpty()) - { - LOG.info("No PMC articles found, trying PubMed fallback"); - filteredArticles = searchPubMedFallback(expAnnotations); - } - - // Build and return result - if (filteredArticles.isEmpty()) - { - LOG.info("No publications found"); - return NcbiPublicationSearchResult.notFound(); - } - - LOG.info("Found " + filteredArticles.size() + " publication(s)"); - return buildSearchResult(filteredArticles); + return applyPriorityFiltering(pmcArticles, log); } /** @@ -323,7 +353,8 @@ private static Map fetchMetadata(Collection ids, Str private static List fetchAndVerifyPmcArticles( Set pmcIds, Map> idToStrategies, - ExperimentAnnotations expAnnotations) + ExperimentAnnotations expAnnotations, + Logger log) { if (pmcIds.isEmpty()) return Collections.emptyList(); @@ -346,7 +377,7 @@ private static List fetchAndVerifyPmcArticles( { String source = articleData.optString("source", ""); String journal = articleData.optString("fulljournalname", ""); - LOG.info("Excluded preprint PMC" + pmcId + ": source=\"" + source + "\", journal=\"" + journal + "\""); + log.info("Excluded preprint PMC" + pmcId + ": source=\"" + source + "\", journal=\"" + journal + "\""); continue; } @@ -354,25 +385,33 @@ private static List fetchAndVerifyPmcArticles( boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); - List matchedBy = new ArrayList<>(); - if (authorMatch) matchedBy.add("Author"); - if (titleMatch) matchedBy.add("Title"); - - // Extract PMID from PMC metadata - String pmid = extractPmidFromPmcMetadata(articleData, pmcId); + // Extract publication ID and determine type + String publicationId = extractPmidFromPmcMetadata(articleData, pmcId); + PublicationType publicationType = publicationId.startsWith(DatasetStatus.TYPE_PMC) + ? PublicationType.PMC + : PublicationType.PMID; // Get strategies that found this article List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); + // Map strategies to boolean flags + boolean foundByPxId = strategies.contains("PMC_PX_ID"); + boolean foundByUrl = strategies.contains("PMC_PanoramaURL"); + boolean foundByDoi = strategies.contains("PMC_DOI"); + ArticleMatch article = new ArticleMatch( - pmid, - String.join(", ", strategies), - String.join(", ", matchedBy) + publicationId, + publicationType, + foundByPxId, + foundByUrl, + foundByDoi, + authorMatch, + titleMatch ); articles.add(article); - LOG.info("PMC" + pmcId + " -> PMID " + pmid + " | Found by: " + article.getFoundBy() + - " | Matched: " + article.getMatchedBy()); + log.info("PMC" + pmcId + " -> " + publicationType + " " + publicationId + + " | Match info: " + article.getMatchInfo()); } return articles; @@ -381,18 +420,24 @@ private static List fetchAndVerifyPmcArticles( /** * Apply priority filtering to narrow down results */ - private static List applyPriorityFiltering(List articles) + private static List applyPriorityFiltering(List articles, Logger log) { if (articles.size() <= 1) return articles; // Priority 1: Filter to articles found by multiple data IDs (most reliable) List multipleIds = articles.stream() - .filter(a -> a.getFoundBy().split(", ").length >= 2) + .filter(a -> { + int count = 0; + if (a.matchesProteomeXchangeId()) count++; + if (a.matchesPanoramaUrl()) count++; + if (a.matchesDoi()) count++; + return count >= 2; + }) .collect(Collectors.toList()); if (!multipleIds.isEmpty()) { - LOG.info("Filtered to " + multipleIds.size() + " article(s) found by multiple IDs"); + log.debug("Filtered to " + multipleIds.size() + " article(s) found by multiple IDs"); articles = multipleIds; } @@ -400,12 +445,12 @@ private static List applyPriorityFiltering(List arti // Priority 2: Filter to articles with both Author AND Title match List bothMatches = articles.stream() - .filter(a -> "Author, Title".equals(a.getMatchedBy())) + .filter(a -> a.matchesAuthor() && a.matchesTitle()) .collect(Collectors.toList()); if (!bothMatches.isEmpty()) { - LOG.info("Filtered to " + bothMatches.size() + " article(s) with both Author and Title match"); + log.debug("Filtered to " + bothMatches.size() + " article(s) with both Author and Title match"); return bothMatches; } @@ -415,7 +460,7 @@ private static List applyPriorityFiltering(List arti /** * Fall back to PubMed search if PMC finds nothing */ - private static List searchPubMedFallback(ExperimentAnnotations expAnnotations) + private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) { String firstName = expAnnotations.getSubmitterUser() != null ? expAnnotations.getSubmitterUser().getFirstName() : null; @@ -425,7 +470,7 @@ private static List searchPubMedFallback(ExperimentAnnotations exp if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName) || StringUtils.isBlank(title)) { - LOG.info("Cannot perform PubMed fallback - missing author or title information"); + log.info("Cannot perform PubMed fallback - missing author or title information"); return Collections.emptyList(); } @@ -433,16 +478,16 @@ private static List searchPubMedFallback(ExperimentAnnotations exp String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", lastName, firstName, title); - LOG.info("PubMed fallback query: " + query); + log.debug("PubMed fallback query: " + query); List pmids = searchPubMed(query); if (pmids.isEmpty()) { - LOG.info("PubMed fallback found no results"); + log.info("PubMed fallback found no results"); return Collections.emptyList(); } - LOG.info("PubMed fallback found " + pmids.size() + " result(s), verifying..."); + log.info("PubMed fallback found " + pmids.size() + " result(s), verifying..."); // Fetch metadata and verify Map metadata = fetchPubMedMetadata(pmids); @@ -456,7 +501,7 @@ private static List searchPubMedFallback(ExperimentAnnotations exp // Filter out preprints if (isPreprint(articleData)) { - LOG.info("Excluded preprint PMID " + pmid); + log.info("Excluded preprint PMID " + pmid); continue; } @@ -467,13 +512,21 @@ private static List searchPubMedFallback(ExperimentAnnotations exp // Only accept if BOTH match if (authorMatch && titleMatch) { - ArticleMatch article = new ArticleMatch(pmid, "PubMed_Author+Title", "Author, Title"); + ArticleMatch article = new ArticleMatch( + pmid, + PublicationType.PMID, + false, // Not found by PX ID + false, // Not found by Panorama URL + false, // Not found by DOI + true, // Matched by author + true // Matched by title + ); articles.add(article); - LOG.info("PMID " + pmid + " verified with both Author and Title match"); + log.info("PMID " + pmid + " verified with both Author and Title match"); } } - LOG.info("Verified " + articles.size() + " of " + pmids.size() + " PubMed results"); + log.info("Verified " + articles.size() + " of " + pmids.size() + " PubMed results"); return articles; } @@ -662,33 +715,20 @@ private static NcbiPublicationSearchResult buildSearchResult(List return NcbiPublicationSearchResult.notFound(); } - List pmids = articles.stream() - .map(ArticleMatch::getPmid) - .collect(Collectors.toList()); - - // Determine publication type from first ID (prefer PMID over PMC) - String firstId = pmids.get(0); - String publicationType = firstId.startsWith(DatasetStatus.TYPE_PMC) ? DatasetStatus.TYPE_PMC : DatasetStatus.TYPE_PMID; - - String strategy; + String matchInfo; if (articles.size() == 1) { - ArticleMatch article = articles.get(0); - String matchInfo = article.getMatchedBy().isEmpty() ? "" : "(" + article.getMatchedBy() + ")"; - strategy = article.getFoundBy() + matchInfo; + matchInfo = articles.get(0).getMatchInfo(); } else { - // Multiple articles: format each on separate line - strategy = articles.stream() - .map(a -> { - String matchInfo = a.getMatchedBy().isEmpty() ? "" : "(" + a.getMatchedBy() + ")"; - return a.getPmid() + ": " + a.getFoundBy() + matchInfo; - }) + // Multiple articles: prefix each with publication ID + matchInfo = articles.stream() + .map(a -> a.getPublicationId() + ": " + a.getMatchInfo()) .collect(Collectors.joining("\n")); } - return NcbiPublicationSearchResult.found(pmids, publicationType, strategy, articles); + return NcbiPublicationSearchResult.found(matchInfo, articles); } /** @@ -719,59 +759,80 @@ private static String quote(String str) */ public static class NcbiPublicationSearchResult { - private final List _pmids; - private final String _publicationType; // "PMID" or "PMC" - private final String _searchStrategy; - private final boolean _found; + private final String _matchInfo; private final List _articles; - private NcbiPublicationSearchResult(List pmids, String publicationType, String searchStrategy, boolean found, List articles) + private NcbiPublicationSearchResult(String matchInfo, List articles) { - _pmids = pmids != null ? pmids : Collections.emptyList(); - _publicationType = publicationType; - _searchStrategy = searchStrategy; - _found = found; + _matchInfo = matchInfo; _articles = articles != null ? articles : Collections.emptyList(); } - public static NcbiPublicationSearchResult found(List pmids, String publicationType, String strategy, List articles) + public static NcbiPublicationSearchResult found(String matchInfo, List articles) { - return new NcbiPublicationSearchResult(pmids, publicationType, strategy, true, articles); + return new NcbiPublicationSearchResult(matchInfo, articles); } public static NcbiPublicationSearchResult notFound() { - return new NcbiPublicationSearchResult(null, null, null, false, null); + return new NcbiPublicationSearchResult(null, null); } public boolean isFound() { - return _found; + return !_articles.isEmpty(); } - public List getPmids() + public String getMatchInfo() { - return _pmids; + return _matchInfo; } - public String getPublicationType() + public List getArticles() { - return _publicationType; + return _articles; } - public String getSearchStrategy() + /** + * Get list of publication IDs from all articles + */ + public List getPublicationIds() { - return _searchStrategy; + return _articles.stream() + .map(ArticleMatch::getPublicationId) + .collect(Collectors.toList()); } - public List getArticles() + /** + * Get publication IDs as comma-separated string + */ + public String getPublicationIdsAsString() { - return _articles; + return _articles.stream() + .map(ArticleMatch::getPublicationId) + .collect(Collectors.joining(", ")); + } + + /** + * Get publication type of the first article (primary result) + * Returns null if no articles found + */ + public String getPublicationType() + { + if (_articles.isEmpty()) + { + return null; + } + PublicationType type = _articles.get(0).getPublicationType(); + return type != null ? type.toDatabaseValue() : null; } - public String getPmidsAsString() + /** + * Get the primary (first) article match, or null if no articles found + */ + public ArticleMatch getPrimaryArticle() { - return String.join(", ", _pmids); + return _articles.isEmpty() ? null : _articles.get(0); } } @@ -780,30 +841,76 @@ public String getPmidsAsString() */ public static class ArticleMatch { - private final String _pmid; - private final String _foundBy; // e.g., "PMC_PX_ID, PMC_DOI" - private final String _matchedBy; // e.g., "Author, Title" + private final String _publicationId; + private final PublicationType _publicationType; + private final boolean _matchesProteomeXchangeId; + private final boolean _matchesPanoramaUrl; + private final boolean _matchesDoi; + private final boolean _matchesAuthor; + private final boolean _matchesTitle; + + public ArticleMatch(String publicationId, PublicationType publicationType, + boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, + boolean matchesAuthor, boolean matchesTitle) + { + _publicationId = publicationId; + _publicationType = publicationType; + _matchesProteomeXchangeId = matchesProteomeXchangeId; + _matchesPanoramaUrl = matchesPanoramaUrl; + _matchesDoi = matchesDoi; + _matchesAuthor = matchesAuthor; + _matchesTitle = matchesTitle; + } + + public String getPublicationId() + { + return _publicationId; + } + + public PublicationType getPublicationType() + { + return _publicationType; + } + + public boolean matchesProteomeXchangeId() + { + return _matchesProteomeXchangeId; + } + + public boolean matchesPanoramaUrl() + { + return _matchesPanoramaUrl; + } - public ArticleMatch(String pmid, String foundBy, String matchedBy) + public boolean matchesDoi() { - _pmid = pmid; - _foundBy = foundBy; - _matchedBy = matchedBy; + return _matchesDoi; } - public String getPmid() + public boolean matchesAuthor() { - return _pmid; + return _matchesAuthor; } - public String getFoundBy() + public boolean matchesTitle() { - return _foundBy; + return _matchesTitle; } - public String getMatchedBy() + /** + * Get a predictable representation of what matched for this article. + * This is what gets stored in the PublicationMatchInfo database column. + * Example: "ProteomeXchange ID, Panorama URL, Author, Title" + */ + public String getMatchInfo() { - return _matchedBy; + List matches = new ArrayList<>(); + if (_matchesProteomeXchangeId) matches.add("ProteomeXchange ID"); + if (_matchesPanoramaUrl) matches.add("Panorama URL"); + if (_matchesDoi) matches.add("DOI"); + if (_matchesAuthor) matches.add("Author"); + if (_matchesTitle) matches.add("Title"); + return String.join(", ", matches); } } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index b0b786a6..775d2706 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -179,27 +179,27 @@ private static class PublicationCheckResult { private final boolean publicationFound; private final String publicationId; private final String publicationType; - private final String searchStrategy; + private final String matchInfo; - public PublicationCheckResult(boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType, @Nullable String searchStrategy) { + public PublicationCheckResult(boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType, @Nullable String matchInfo) { this.publicationFound = publicationFound; this.publicationId = publicationId; this.publicationType = publicationType; - this.searchStrategy = searchStrategy; + this.matchInfo = matchInfo; } public static PublicationCheckResult notFound() { return new PublicationCheckResult(false, null, null, null); } - public static PublicationCheckResult found(@NotNull String publicationId, @NotNull String publicationType, @NotNull String searchStrategy) { - return new PublicationCheckResult(true, publicationId, publicationType, searchStrategy); + public static PublicationCheckResult found(@NotNull String publicationId, @NotNull String publicationType, @NotNull String matchInfo) { + return new PublicationCheckResult(true, publicationId, publicationType, matchInfo); } public boolean isPublicationFound() { return publicationFound; } public @Nullable String getPublicationId() { return publicationId; } public @Nullable String getPublicationType() { return publicationType; } - public @Nullable String getSearchStrategy() { return searchStrategy; } + public @Nullable String getMatchInfo() { return matchInfo; } } /** @@ -241,7 +241,7 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn return PublicationCheckResult.found( datasetStatus.getPotentialPublicationId(), datasetStatus.getPublicationType(), - datasetStatus.getPublicationSearchStrategy() + datasetStatus.getPublicationMatchInfo() ); } } @@ -251,15 +251,15 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn try { NcbiPublicationSearchService.NcbiPublicationSearchResult searchResult = - NcbiPublicationSearchService.searchForPublication(expAnnotations); + NcbiPublicationSearchService.searchForPublication(expAnnotations, 1, log); if (searchResult.isFound()) { - String publicationIds = searchResult.getPmidsAsString(); + String publicationIds = searchResult.getPublicationIdsAsString(); String publicationType = searchResult.getPublicationType(); - log.info(String.format("Found publication for experiment %d: %s %s (strategy: %s)", - expAnnotations.getId(), publicationType, publicationIds, searchResult.getSearchStrategy())); - return PublicationCheckResult.found(publicationIds, publicationType, searchResult.getSearchStrategy()); + log.info(String.format("Found publication for experiment %d: %s %s (match info: %s)", + expAnnotations.getId(), publicationType, publicationIds, searchResult.getMatchInfo())); + return PublicationCheckResult.found(publicationIds, publicationType, searchResult.getMatchInfo()); } else { @@ -430,7 +430,7 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati { datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); datasetStatus.setPublicationType(publicationResult.getPublicationType()); - datasetStatus.setPublicationSearchStrategy(publicationResult.getSearchStrategy()); + datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); } DatasetStatusManager.save(datasetStatus, getUser()); @@ -444,7 +444,7 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati { datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); datasetStatus.setPublicationType(publicationResult.getPublicationType()); - datasetStatus.setPublicationSearchStrategy(publicationResult.getSearchStrategy()); + datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); } DatasetStatusManager.update(datasetStatus, getUser()); From 2100502a373a70cfb210f2f03afba09091fa5153 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 20 Feb 2026 18:44:54 -0800 Subject: [PATCH 06/27] - Extracted PublicationMatch into a top level class - Removed redundant constants - Removed redundant class PublicationCheckResult --- .../PanoramaPublicController.java | 63 ++- .../PanoramaPublicNotification.java | 28 +- .../panoramapublic/model/DatasetStatus.java | 6 +- .../ncbi/NcbiPublicationSearchService.java | 358 ++++-------------- .../panoramapublic/ncbi/PublicationMatch.java | 172 +++++++++ .../pipeline/PrivateDataReminderJob.java | 85 +---- 6 files changed, 308 insertions(+), 404 deletions(-) create mode 100644 panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 71fe21b5..bd676184 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -156,6 +156,7 @@ import org.labkey.panoramapublic.message.PrivateDataMessageScheduler; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.model.CatalogEntry; import org.labkey.panoramapublic.model.DataLicense; import org.labkey.panoramapublic.model.DatasetStatus; @@ -10585,8 +10586,9 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public class DismissPublicationSuggestionAction extends UpdateDatasetStatusAction + public static class DismissPublicationSuggestionAction extends UpdateDatasetStatusAction { + private PublicationMatch _publicationMatch; @Override protected String getConfirmViewTitle() { @@ -10596,14 +10598,9 @@ protected String getConfirmViewTitle() @Override protected String getConfirmViewMessage() { - String publicationLabel = ""; - if (_datasetStatus != null && !StringUtils.isBlank(_datasetStatus.getPublicationType())) - { - publicationLabel = _datasetStatus.getPublicationLabel(); - } - - HtmlString message = HtmlString.of(DIV("You are dismissing the publication suggestion for your data on Panorama Public at " + _exptAnnotations.getShortUrl().renderShortURL(), BR(), BR(), - DIV("We will no loger suggest " + publicationLabel + " " + _datasetStatus.getPotentialPublicationId() + " for this dataset."))); + HtmlString message = HtmlString.of(DIV("You are dismissing the publication suggestion for your data on Panorama Public at " + + _exptAnnotations.getShortUrl().renderShortURL(), BR(), BR(), + DIV("We will no longer suggest " + _publicationMatch.getPublicationLabel() + " for this dataset."))); return message.renderToString(); } @@ -10611,7 +10608,9 @@ protected String getConfirmViewMessage() @Override protected void doValidationForAction(Errors errors) { - if (_datasetStatus == null || StringUtils.isBlank(_datasetStatus.getPotentialPublicationId())) + _publicationMatch = PublicationMatch.fromDatasetStatus(_datasetStatus); + + if (_publicationMatch == null) { errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); } @@ -10669,32 +10668,18 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex try { // Perform the publication search - NcbiPublicationSearchService.NcbiPublicationSearchResult searchResult = - NcbiPublicationSearchService.searchForPublication(exptAnnotations); + PublicationMatch articleMatch = + NcbiPublicationSearchService.searchForPublication(exptAnnotations, null); response.put("success", true); - response.put("publicationFound", searchResult.isFound()); + response.put("publicationFound", articleMatch != null); - if (searchResult.isFound()) + if (articleMatch != null) { - String publicationIds = searchResult.getPublicationIdsAsString(); - String publicationType = searchResult.getPublicationType(); - response.put("publicationId", publicationIds); - response.put("publicationType", publicationType); - response.put("matchInfo", searchResult.getMatchInfo()); - - // Generate appropriate URL based on publication type - String firstId = searchResult.getPublicationIds().get(0); - String publicationUrl; - if (DatasetStatus.TYPE_PMC.equals(publicationType)) - { - publicationUrl = "https://www.ncbi.nlm.nih.gov/pmc/articles/" + firstId + "/"; - } - else - { - publicationUrl = "https://pubmed.ncbi.nlm.nih.gov/" + firstId; - } - response.put("publicationUrl", publicationUrl); + response.put("publicationId", articleMatch.getPublicationId()); + response.put("publicationType", articleMatch.getPublicationType().name()); + response.put("matchInfo", articleMatch.getMatchInfo()); + response.put("publicationUrl", articleMatch.getPublicationUrl()); // Update DatasetStatus with the search result DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); @@ -10702,21 +10687,21 @@ public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Ex { datasetStatus = new DatasetStatus(); datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); - datasetStatus.setPotentialPublicationId(publicationIds); - datasetStatus.setPublicationType(publicationType); - datasetStatus.setPublicationMatchInfo(searchResult.getMatchInfo()); + datasetStatus.setPotentialPublicationId(articleMatch.getPublicationId()); + datasetStatus.setPublicationType(articleMatch.getPublicationType().name()); + datasetStatus.setPublicationMatchInfo(articleMatch.getMatchInfo()); DatasetStatusManager.save(datasetStatus, getUser()); } else { - datasetStatus.setPotentialPublicationId(publicationIds); - datasetStatus.setPublicationType(publicationType); - datasetStatus.setPublicationMatchInfo(searchResult.getMatchInfo()); + datasetStatus.setPotentialPublicationId(articleMatch.getPublicationId()); + datasetStatus.setPublicationType(articleMatch.getPublicationType().name()); + datasetStatus.setPublicationMatchInfo(articleMatch.getMatchInfo()); datasetStatus.setUserDismissedPublication(false); // Reset dismissal flag DatasetStatusManager.update(datasetStatus, getUser()); } - response.put("message", "Publication found: " + datasetStatus.getPublicationLabel() + " " + publicationIds); + response.put("message", "Publication found: " + articleMatch.getPublicationLabel()); } else { diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index 4f79ab25..eeb6688c 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -23,12 +23,12 @@ import org.labkey.panoramapublic.datacite.DataCiteException; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; -import org.labkey.panoramapublic.model.DatasetStatus; import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.JournalExperiment; import org.labkey.panoramapublic.model.JournalSubmission; import org.labkey.panoramapublic.model.Submission; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -393,10 +393,10 @@ public static void postPublicationDismissalMessage(@NotNull Journal journal, @No public static void postPrivateDataReminderMessage(@NotNull Journal journal, @NotNull JournalSubmission js, @NotNull ExperimentAnnotations expAnnotations, @NotNull User submitter, @NotNull User messagePoster, List notifyUsers, @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin, - boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType) + @Nullable PublicationMatch articleMatch) { - String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, publicationFound, publicationId, publicationType); - String title = publicationFound + String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, articleMatch); + String title = articleMatch != null ? "Congratulations! We Found a Publication Associated with Your Data on Panorama Public" : "Action Required: Status Update for Your Private Data on Panorama Public"; postNotificationFullTitle(journal, js.getJournalExperiment(), message, messagePoster, title, AnnouncementService.StatusOption.Closed, notifyUsers); @@ -405,7 +405,7 @@ public static void postPrivateDataReminderMessage(@NotNull Journal journal, @Not public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations exptAnnotations, @NotNull User submitter, @NotNull JournalSubmission js,@NotNull Announcement announcement, @NotNull Container announcementContainer, @NotNull User journalAdmin, - boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType) + @Nullable PublicationMatch articleMatch) { String shortUrl = exptAnnotations.getShortUrl().renderShortURL(); String makePublicLink = PanoramaPublicController.getMakePublicUrl(exptAnnotations.getId(), exptAnnotations.getContainer()).getURIString(); @@ -432,27 +432,13 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations StringBuilder message = new StringBuilder(); message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); - if (publicationFound && !StringUtils.isBlank(publicationId)) + if (articleMatch != null) { // Message variant when a publication was found - String publicationUrl; - String publicationLabel; - - if (DatasetStatus.TYPE_PMC.equals(publicationType)) - { - publicationUrl = "https://www.ncbi.nlm.nih.gov/pmc/articles/" + publicationId + "/"; - publicationLabel = "PMC ID " + publicationId; - } - else // Default to PMID - { - publicationUrl = "https://pubmed.ncbi.nlm.nih.gov/" + publicationId; - publicationLabel = "PubMed ID " + publicationId; - } - message.append("Great news! We found a publication that appears to be associated with your data on Panorama Public (") .append(shortUrl).append("), which has been private since ").append(dateString).append(".") .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) - .append(NL2).append(bold("Publication Found:")).append(" ").append(link(publicationLabel, publicationUrl)) + .append(NL2).append(bold("Publication Found:")).append(" ").append(link(articleMatch.getPublicationLabel(), articleMatch.getPublicationUrl())) .append(NL2).append("Since your work has been published, we encourage you to make your data public so the research community can access it alongside your publication. ") .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") .append(bold(link("Make Data Public", makePublicLink))).append(".") diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 6a8c6452..5e6c6a3a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -1,15 +1,13 @@ package org.labkey.panoramapublic.model; import org.jetbrains.annotations.Nullable; -import org.labkey.api.view.ShortURLRecord; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import java.util.Date; public class DatasetStatus extends DbEntity { - public static final String TYPE_PMC = "PMC"; - public static final String TYPE_PMID = "PMID"; private int _experimentAnnotationsId; private Date _lastReminderDate; @@ -102,7 +100,7 @@ public void setPublicationType(String publicationType) public String getPublicationLabel() { - PublicationType type = PublicationType.fromDatabaseValue(_publicationType); + PublicationMatch.PublicationType type = PublicationMatch.PublicationType.fromString(_publicationType); if (type == null) { return _publicationType != null ? _publicationType : ""; diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 74eeef4d..bd44e203 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -9,9 +9,8 @@ import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; -import org.labkey.panoramapublic.model.DatasetStatus; import org.labkey.panoramapublic.model.ExperimentAnnotations; -import org.labkey.panoramapublic.model.PublicationType; +import org.labkey.panoramapublic.ncbi.PublicationMatch.PublicationType; import java.io.BufferedReader; import java.io.InputStreamReader; @@ -29,6 +28,10 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_DOI; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PANORAMA_URL; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PX_ID; + /** * Service for searching PubMed Central (PMC) and PubMed for publications associated with private Panorama Public datasets. */ @@ -64,8 +67,6 @@ public class NcbiPublicationSearchService "comparison", "evaluation" ); - private static final int MAX_RESULTS_LIMIT = 5; - private static Logger getLog(@Nullable Logger logger) { return logger != null ? logger : LOG; @@ -73,11 +74,13 @@ private static Logger getLog(@Nullable Logger logger) /** * Search for publications associated with the given experiment. - * Returns at most 1 result. + * Returns the top result if multiple results are found + * @param logger optional logger; when null, uses the class logger */ - public static NcbiPublicationSearchResult searchForPublication(@NotNull ExperimentAnnotations expAnnotations) + public static PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) { - return searchForPublication(expAnnotations, 1, null); + List matches = searchForPublication(expAnnotations, 1, logger); + return matches.isEmpty() ? null : matches.get(0); } /** @@ -86,13 +89,13 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime * @param maxResults maximum number of article matches to return (capped at 5) * @param logger optional logger; when null, uses the class logger */ - public static NcbiPublicationSearchResult searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger) + public static List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger) { Logger log = getLog(logger); - maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS_LIMIT)); + maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS)); log.info("Starting publication search for experiment: " + expAnnotations.getId()); - List matchedArticles = searchPmc(expAnnotations, log); + List matchedArticles = searchPmc(expAnnotations, log); // If no PMC results, fall back to PubMed if (matchedArticles.isEmpty()) @@ -105,7 +108,7 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime if (matchedArticles.isEmpty()) { log.info("No publications found"); - return NcbiPublicationSearchResult.notFound(); + return Collections.emptyList(); } if (matchedArticles.size() > maxResults) @@ -113,10 +116,13 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime matchedArticles = matchedArticles.subList(0, maxResults); } log.info("Returning " + matchedArticles.size() + StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); - return buildSearchResult(matchedArticles); + return matchedArticles; } - private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) + /** + * Search PubMed Central + */ + private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) { // Track PMC IDs found by each strategy Map> pmcIdsByStrategy = new HashMap<>(); @@ -128,7 +134,7 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime List ids = searchPmc(quote(expAnnotations.getPxid())); if (!ids.isEmpty()) { - pmcIdsByStrategy.put("PMC_PX_ID", ids); + pmcIdsByStrategy.put(MATCH_PX_ID, ids); log.debug("Found " + ids.size() + " PMC articles by PX ID"); } rateLimit(); @@ -142,7 +148,7 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime List ids = searchPmc(quote(panoramaUrl)); if (!ids.isEmpty()) { - pmcIdsByStrategy.put("PMC_PanoramaURL", ids); + pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); log.debug("Found " + ids.size() + " PMC articles by Panorama URL"); } rateLimit(); @@ -158,7 +164,7 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime List ids = searchPmc(quote(doiUrl)); if (!ids.isEmpty()) { - pmcIdsByStrategy.put("PMC_DOI", ids); + pmcIdsByStrategy.put(MATCH_DOI, ids); log.debug("Found " + ids.size() + " PMC articles by DOI"); } rateLimit(); @@ -181,7 +187,7 @@ public static NcbiPublicationSearchResult searchForPublication(@NotNull Experime log.info("Total unique PMC IDs found: " + uniquePmcIds.size()); // Fetch and verify PMC articles - List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); + List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); // Apply priority filtering return applyPriorityFiltering(pmcArticles, log); @@ -219,37 +225,17 @@ private static List executeSearch(String query, String database) "&retmode=json" + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); - URL url = new URL(urlString); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - conn.setConnectTimeout(TIMEOUT_MS); - conn.setReadTimeout(TIMEOUT_MS); - - int responseCode = conn.getResponseCode(); - if (responseCode != 200) - { - LOG.warn("NCBI ESearch returned non-200 response: " + responseCode); - return Collections.emptyList(); - } - - BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); - StringBuilder response = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) - { - response.append(line); - } - reader.close(); + conn = openGet(urlString); // Parse JSON response - JSONObject jsonResponse = new JSONObject(response.toString()); - JSONObject esearchresult = jsonResponse.getJSONObject("esearchresult"); - JSONArray idlist = esearchresult.getJSONArray("idlist"); + JSONObject json = readJson(conn); + JSONObject eSearchResult = json.getJSONObject("esearchresult"); + JSONArray idList = eSearchResult.getJSONArray("idlist"); List ids = new ArrayList<>(); - for (int i = 0; i < idlist.length(); i++) + for (int i = 0; i < idList.length(); i++) { - ids.add(idlist.getString(i)); + ids.add(idList.getString(i)); } return ids; } @@ -264,6 +250,28 @@ private static List executeSearch(String query, String database) } } + private static HttpURLConnection openGet(String urlString) throws Exception + { + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setConnectTimeout(TIMEOUT_MS); + conn.setReadTimeout(TIMEOUT_MS); + return conn; + } + + private static JSONObject readJson(HttpURLConnection conn) throws Exception + { + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(conn.getInputStream()))) + { + StringBuilder sb = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) sb.append(line); + return new JSONObject(sb.toString()); + } + } + /** * Fetch metadata for PMC articles using ESummary API */ @@ -297,11 +305,7 @@ private static Map fetchMetadata(Collection ids, Str "&retmode=json" + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); - URL url = new URL(urlString); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - conn.setConnectTimeout(TIMEOUT_MS); - conn.setReadTimeout(TIMEOUT_MS); + conn = openGet(urlString); int responseCode = conn.getResponseCode(); if (responseCode != 200) @@ -310,18 +314,8 @@ private static Map fetchMetadata(Collection ids, Str return Collections.emptyMap(); } - BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); - StringBuilder response = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) - { - response.append(line); - } - reader.close(); - - // Parse JSON response - JSONObject jsonResponse = new JSONObject(response.toString()); - JSONObject result = jsonResponse.optJSONObject("result"); + JSONObject json = readJson(conn); + JSONObject result = json.optJSONObject("result"); if (result == null) return Collections.emptyMap(); // Extract metadata for each ID @@ -350,7 +344,7 @@ private static Map fetchMetadata(Collection ids, Str /** * Fetch and verify PMC articles (filter preprints, check author/title matches) */ - private static List fetchAndVerifyPmcArticles( + private static List fetchAndVerifyPmcArticles( Set pmcIds, Map> idToStrategies, ExperimentAnnotations expAnnotations, @@ -361,7 +355,7 @@ private static List fetchAndVerifyPmcArticles( // Fetch all metadata in batch Map metadata = fetchPmcMetadata(pmcIds); - List articles = new ArrayList<>(); + List articles = new ArrayList<>(); String firstName = expAnnotations.getSubmitterUser() != null ? expAnnotations.getSubmitterUser().getFirstName() : null; String lastName = expAnnotations.getSubmitterUser() != null @@ -385,21 +379,20 @@ private static List fetchAndVerifyPmcArticles( boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); - // Extract publication ID and determine type - String publicationId = extractPmidFromPmcMetadata(articleData, pmcId); - PublicationType publicationType = publicationId.startsWith(DatasetStatus.TYPE_PMC) - ? PublicationType.PMC - : PublicationType.PMID; + // Extract PubMed ID, is found + String pubMedId = extractPubMedId(articleData, pmcId); + String publicationId = pubMedId == null ? pmcId : pubMedId; + PublicationType publicationType = pubMedId == null ? PublicationType.PMC : PublicationType.PMID; // Get strategies that found this article List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); // Map strategies to boolean flags - boolean foundByPxId = strategies.contains("PMC_PX_ID"); - boolean foundByUrl = strategies.contains("PMC_PanoramaURL"); - boolean foundByDoi = strategies.contains("PMC_DOI"); + boolean foundByPxId = strategies.contains(MATCH_PX_ID); + boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); + boolean foundByDoi = strategies.contains(MATCH_DOI); - ArticleMatch article = new ArticleMatch( + PublicationMatch article = new PublicationMatch( publicationId, publicationType, foundByPxId, @@ -420,19 +413,13 @@ private static List fetchAndVerifyPmcArticles( /** * Apply priority filtering to narrow down results */ - private static List applyPriorityFiltering(List articles, Logger log) + private static List applyPriorityFiltering(List articles, Logger log) { if (articles.size() <= 1) return articles; // Priority 1: Filter to articles found by multiple data IDs (most reliable) - List multipleIds = articles.stream() - .filter(a -> { - int count = 0; - if (a.matchesProteomeXchangeId()) count++; - if (a.matchesPanoramaUrl()) count++; - if (a.matchesDoi()) count++; - return count >= 2; - }) + List multipleIds = articles.stream() + .filter(a -> countDataIdMatches(a) >= 2) .collect(Collectors.toList()); if (!multipleIds.isEmpty()) @@ -444,7 +431,7 @@ private static List applyPriorityFiltering(List arti if (articles.size() <= 1) return articles; // Priority 2: Filter to articles with both Author AND Title match - List bothMatches = articles.stream() + List bothMatches = articles.stream() .filter(a -> a.matchesAuthor() && a.matchesTitle()) .collect(Collectors.toList()); @@ -457,10 +444,19 @@ private static List applyPriorityFiltering(List arti return articles; } + private static int countDataIdMatches(PublicationMatch a) + { + int count = 0; + if (a.matchesProteomeXchangeId()) count++; + if (a.matchesPanoramaUrl()) count++; + if (a.matchesDoi()) count++; + return count; + } + /** * Fall back to PubMed search if PMC finds nothing */ - private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) + private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) { String firstName = expAnnotations.getSubmitterUser() != null ? expAnnotations.getSubmitterUser().getFirstName() : null; @@ -492,7 +488,7 @@ private static List searchPubMed(ExperimentAnnotations expAnnotati // Fetch metadata and verify Map metadata = fetchPubMedMetadata(pmids); - List articles = new ArrayList<>(); + List articles = new ArrayList<>(); for (String pmid : pmids) { JSONObject articleData = metadata.get(pmid); @@ -512,7 +508,7 @@ private static List searchPubMed(ExperimentAnnotations expAnnotati // Only accept if BOTH match if (authorMatch && titleMatch) { - ArticleMatch article = new ArticleMatch( + PublicationMatch article = new PublicationMatch( pmid, PublicationType.PMID, false, // Not found by PX ID @@ -564,7 +560,7 @@ private static boolean checkAuthorMatch(JSONObject metadata, String firstName, S return false; } - String firstInitial = firstName.substring(0, 1).toUpperCase(); + String firstInitial = firstName.substring(0, 1).toLowerCase(); String lastNameLower = lastName.toLowerCase(); for (int i = 0; i < authors.length(); i++) @@ -578,7 +574,7 @@ private static boolean checkAuthorMatch(JSONObject metadata, String firstName, S if (authorName.startsWith(lastNameLower)) { String afterLastName = authorName.substring(lastNameLower.length()).trim(); - if (afterLastName.startsWith(firstInitial.toLowerCase())) + if (afterLastName.startsWith(firstInitial)) { return true; } @@ -685,7 +681,7 @@ private static List extractTitleKeywords(String title) /** * Extract PMID from PMC metadata */ - private static String extractPmidFromPmcMetadata(JSONObject pmcMetadata, String pmcId) + private static @Nullable String extractPubMedId(JSONObject pmcMetadata, String pmcId) { // PMC articles contain PMID in articleids array JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); @@ -701,35 +697,9 @@ private static String extractPmidFromPmcMetadata(JSONObject pmcMetadata, String } } - // Fallback: use PMC ID if no PMID found - return DatasetStatus.TYPE_PMC + pmcId; + return null; } - /** - * Build search result from filtered articles - */ - private static NcbiPublicationSearchResult buildSearchResult(List articles) - { - if (articles.isEmpty()) - { - return NcbiPublicationSearchResult.notFound(); - } - - String matchInfo; - if (articles.size() == 1) - { - matchInfo = articles.get(0).getMatchInfo(); - } - else - { - // Multiple articles: prefix each with publication ID - matchInfo = articles.stream() - .map(a -> a.getPublicationId() + ": " + a.getMatchInfo()) - .collect(Collectors.joining("\n")); - } - - return NcbiPublicationSearchResult.found(matchInfo, articles); - } /** * Rate limiting: wait 400ms between API requests @@ -753,164 +723,4 @@ private static String quote(String str) { return "\"" + str + "\""; } - - /** - * Result of a publication search - */ - public static class NcbiPublicationSearchResult - { - private final String _matchInfo; - private final List _articles; - - private NcbiPublicationSearchResult(String matchInfo, List articles) - { - _matchInfo = matchInfo; - _articles = articles != null ? articles : Collections.emptyList(); - } - - public static NcbiPublicationSearchResult found(String matchInfo, List articles) - { - return new NcbiPublicationSearchResult(matchInfo, articles); - } - - public static NcbiPublicationSearchResult notFound() - { - return new NcbiPublicationSearchResult(null, null); - } - - public boolean isFound() - { - return !_articles.isEmpty(); - } - - public String getMatchInfo() - { - return _matchInfo; - } - - public List getArticles() - { - return _articles; - } - - /** - * Get list of publication IDs from all articles - */ - public List getPublicationIds() - { - return _articles.stream() - .map(ArticleMatch::getPublicationId) - .collect(Collectors.toList()); - } - - /** - * Get publication IDs as comma-separated string - */ - public String getPublicationIdsAsString() - { - return _articles.stream() - .map(ArticleMatch::getPublicationId) - .collect(Collectors.joining(", ")); - } - - /** - * Get publication type of the first article (primary result) - * Returns null if no articles found - */ - public String getPublicationType() - { - if (_articles.isEmpty()) - { - return null; - } - PublicationType type = _articles.get(0).getPublicationType(); - return type != null ? type.toDatabaseValue() : null; - } - - /** - * Get the primary (first) article match, or null if no articles found - */ - public ArticleMatch getPrimaryArticle() - { - return _articles.isEmpty() ? null : _articles.get(0); - } - } - - /** - * Information about a matched article - */ - public static class ArticleMatch - { - private final String _publicationId; - private final PublicationType _publicationType; - private final boolean _matchesProteomeXchangeId; - private final boolean _matchesPanoramaUrl; - private final boolean _matchesDoi; - private final boolean _matchesAuthor; - private final boolean _matchesTitle; - - public ArticleMatch(String publicationId, PublicationType publicationType, - boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, - boolean matchesAuthor, boolean matchesTitle) - { - _publicationId = publicationId; - _publicationType = publicationType; - _matchesProteomeXchangeId = matchesProteomeXchangeId; - _matchesPanoramaUrl = matchesPanoramaUrl; - _matchesDoi = matchesDoi; - _matchesAuthor = matchesAuthor; - _matchesTitle = matchesTitle; - } - - public String getPublicationId() - { - return _publicationId; - } - - public PublicationType getPublicationType() - { - return _publicationType; - } - - public boolean matchesProteomeXchangeId() - { - return _matchesProteomeXchangeId; - } - - public boolean matchesPanoramaUrl() - { - return _matchesPanoramaUrl; - } - - public boolean matchesDoi() - { - return _matchesDoi; - } - - public boolean matchesAuthor() - { - return _matchesAuthor; - } - - public boolean matchesTitle() - { - return _matchesTitle; - } - - /** - * Get a predictable representation of what matched for this article. - * This is what gets stored in the PublicationMatchInfo database column. - * Example: "ProteomeXchange ID, Panorama URL, Author, Title" - */ - public String getMatchInfo() - { - List matches = new ArrayList<>(); - if (_matchesProteomeXchangeId) matches.add("ProteomeXchange ID"); - if (_matchesPanoramaUrl) matches.add("Panorama URL"); - if (_matchesDoi) matches.add("DOI"); - if (_matchesAuthor) matches.add("Author"); - if (_matchesTitle) matches.add("Title"); - return String.join(", ", matches); - } - } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java new file mode 100644 index 00000000..9f241c15 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -0,0 +1,172 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.labkey.panoramapublic.model.DatasetStatus; +import java.util.ArrayList; +import java.util.List; + +/** + * Container for publication search article match + */ +public class PublicationMatch +{ + public static final String MATCH_PX_ID = "ProteomeXchange ID"; + public static final String MATCH_PANORAMA_URL = "Panorama URL"; + public static final String MATCH_DOI = "DOI"; + public static final String MATCH_AUTHOR = "Author"; + public static final String MATCH_TITLE = "Title"; + + private final String _publicationId; + private final PublicationType _publicationType; + private final boolean _matchesProteomeXchangeId; + private final boolean _matchesPanoramaUrl; + private final boolean _matchesDoi; + private final boolean _matchesAuthor; + private final boolean _matchesTitle; + + public enum PublicationType + { + PMID("PubMed ID"), + PMC("PMC ID"); + + private final String _label; + + PublicationType(String label) + { + _label = label; + } + + /** Human-readable label, e.g. "PubMed ID" or "PMC ID". */ + public String getLabel() + { + return _label; + } + + public static @Nullable PublicationType fromString(@Nullable String value) + { + if (value == null) return null; + for (PublicationType type : values()) + { + if (type.name().equals(value)) return type; + } + return null; + } + } + + public PublicationMatch(String publicationId, PublicationType publicationType, + boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, + boolean matchesAuthor, boolean matchesTitle) + { + _publicationId = publicationId; + _publicationType = publicationType; + _matchesProteomeXchangeId = matchesProteomeXchangeId; + _matchesPanoramaUrl = matchesPanoramaUrl; + _matchesDoi = matchesDoi; + _matchesAuthor = matchesAuthor; + _matchesTitle = matchesTitle; + } + + public String getPublicationId() + { + return _publicationId; + } + + public PublicationType getPublicationType() + { + return _publicationType; + } + + public boolean matchesProteomeXchangeId() + { + return _matchesProteomeXchangeId; + } + + public boolean matchesPanoramaUrl() + { + return _matchesPanoramaUrl; + } + + public boolean matchesDoi() + { + return _matchesDoi; + } + + public boolean matchesAuthor() + { + return _matchesAuthor; + } + + public boolean matchesTitle() + { + return _matchesTitle; + } + + /** + * String representation of what matched for this article. + * This is what gets stored in the PublicationMatchInfo database column. + * Example: "ProteomeXchange ID, Panorama URL, Author, Title" + * + *

Use {@link #fromMatchInfo(String)} to reconstruct the individual flags. + */ + public String getMatchInfo() + { + List parts = new ArrayList<>(); + if (_matchesProteomeXchangeId) parts.add(MATCH_PX_ID); + if (_matchesPanoramaUrl) parts.add(MATCH_PANORAMA_URL); + if (_matchesDoi) parts.add(MATCH_DOI); + if (_matchesAuthor) parts.add(MATCH_AUTHOR); + if (_matchesTitle) parts.add(MATCH_TITLE); + return String.join(", ", parts); + } + + public String getPublicationUrl() + { + if (_publicationType == PublicationType.PMC) + { + return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + _publicationId; + } + return "https://pubmed.ncbi.nlm.nih.gov/" + _publicationId; + } + + public String getPublicationLabel() + { + return _publicationType.getLabel() + " " + _publicationId; + } + + /** + * Build a PublicationMatch from a persisted DatasetStatus. + */ + public static @Nullable PublicationMatch fromDatasetStatus(@NotNull DatasetStatus datasetStatus) + { + if (StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + { + return null; + } + PublicationType type = PublicationType.fromString(datasetStatus.getPublicationType()); + if (type == null) + { + return null; + } + String matchInfo = datasetStatus.getPublicationMatchInfo(); + boolean pxId = false, url = false, doi = false, author = false, title = false; + if (!StringUtils.isBlank(matchInfo)) + { + for (String segment : matchInfo.split(",")) + { + switch (segment.trim()) + { + case MATCH_PX_ID -> pxId = true; + case MATCH_PANORAMA_URL -> url = true; + case MATCH_DOI -> doi = true; + case MATCH_AUTHOR -> author = true; + case MATCH_TITLE -> title = true; + default -> { /* unknown token – ignore */ } + } + } + } + return new PublicationMatch(datasetStatus.getPotentialPublicationId(), type, + pxId, url, doi, author, title); + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 775d2706..c45bbaf4 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -26,6 +26,7 @@ import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.JournalSubmission; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.query.DatasetStatusManager; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -175,51 +176,24 @@ public static ReminderDecision skip(String reason) { /** * Container for publication search results used during reminder processing */ - private static class PublicationCheckResult { - private final boolean publicationFound; - private final String publicationId; - private final String publicationType; - private final String matchInfo; - - public PublicationCheckResult(boolean publicationFound, @Nullable String publicationId, @Nullable String publicationType, @Nullable String matchInfo) { - this.publicationFound = publicationFound; - this.publicationId = publicationId; - this.publicationType = publicationType; - this.matchInfo = matchInfo; - } - - public static PublicationCheckResult notFound() { - return new PublicationCheckResult(false, null, null, null); - } - - public static PublicationCheckResult found(@NotNull String publicationId, @NotNull String publicationType, @NotNull String matchInfo) { - return new PublicationCheckResult(true, publicationId, publicationType, matchInfo); - } - - public boolean isPublicationFound() { return publicationFound; } - public @Nullable String getPublicationId() { return publicationId; } - public @Nullable String getPublicationType() { return publicationType; } - public @Nullable String getMatchInfo() { return matchInfo; } - } - /** * Checks for publications associated with the experiment if enabled in settings * @param expAnnotations The experiment to check * @param settings The reminder settings * @param forceCheck Force publication check regardless of global setting * @param log Logger for diagnostic messages - * @return PublicationCheckResult with search results + * @return NcbiArticleMatch with search result */ - private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnnotations expAnnotations, - @NotNull PrivateDataReminderSettings settings, - boolean forceCheck, - @NotNull Logger log) + private static PublicationMatch checkForPublication(@NotNull ExperimentAnnotations expAnnotations, + @NotNull PrivateDataReminderSettings settings, + boolean forceCheck, + @NotNull Logger log) { // Check if publication checking is enabled (either globally or forced for this run) if (!forceCheck && !settings.isEnablePublicationCheck()) { log.debug("Publication checking is disabled in settings"); - return PublicationCheckResult.notFound(); + return null; } // Get existing DatasetStatus to check cached results and user dismissals @@ -230,7 +204,7 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn if (Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication())) { log.info(String.format("User has dismissed publication suggestion for experiment %d; skipping search", expAnnotations.getId())); - return PublicationCheckResult.notFound(); + return null; } // If we already have a cached publication ID, use it @@ -238,11 +212,7 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn { log.info(String.format("Using cached publication %s %s for experiment %d", datasetStatus.getPublicationType(), datasetStatus.getPotentialPublicationId(), expAnnotations.getId())); - return PublicationCheckResult.found( - datasetStatus.getPotentialPublicationId(), - datasetStatus.getPublicationType(), - datasetStatus.getPublicationMatchInfo() - ); + return PublicationMatch.fromDatasetStatus(datasetStatus); } } @@ -250,28 +220,13 @@ private static PublicationCheckResult checkForPublication(@NotNull ExperimentAnn log.info(String.format("Searching for publications for experiment %d", expAnnotations.getId())); try { - NcbiPublicationSearchService.NcbiPublicationSearchResult searchResult = - NcbiPublicationSearchService.searchForPublication(expAnnotations, 1, log); - - if (searchResult.isFound()) - { - String publicationIds = searchResult.getPublicationIdsAsString(); - String publicationType = searchResult.getPublicationType(); - log.info(String.format("Found publication for experiment %d: %s %s (match info: %s)", - expAnnotations.getId(), publicationType, publicationIds, searchResult.getMatchInfo())); - return PublicationCheckResult.found(publicationIds, publicationType, searchResult.getMatchInfo()); - } - else - { - log.info(String.format("No publication found for experiment %d", expAnnotations.getId())); - return PublicationCheckResult.notFound(); - } + return NcbiPublicationSearchService.searchForPublication(expAnnotations, log); } catch (Exception e) { log.error(String.format("Error searching for publication for experiment %d: %s", expAnnotations.getId(), e.getMessage()), e); - return PublicationCheckResult.notFound(); + return null; } } @@ -377,7 +332,7 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex } // Check for publications if enabled - PublicationCheckResult publicationResult = checkForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); + PublicationMatch publicationResult = checkForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); if (!context.isTestMode()) { @@ -390,7 +345,7 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex } private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSubmission submission, - Announcement announcement, User submitter, PublicationCheckResult publicationResult, + Announcement announcement, User submitter, @Nullable PublicationMatch publicationResult, ProcessingContext context) { // Older message threads, pre March 2023, will not have the submitter or lab head on the notify list. Add them. @@ -410,13 +365,11 @@ private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSu announcement, context.getAnnouncementsFolder(), context.getJournalAdmin(), - publicationResult.isPublicationFound(), - publicationResult.getPublicationId(), - publicationResult.getPublicationType() + publicationResult ); } - private void updateDatasetStatus(ExperimentAnnotations expAnnotations, PublicationCheckResult publicationResult) + private void updateDatasetStatus(ExperimentAnnotations expAnnotations, @Nullable PublicationMatch publicationResult) { DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); if (datasetStatus == null) @@ -426,10 +379,10 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati datasetStatus.setLastReminderDate(new Date()); // Save publication search results if found - if (publicationResult.isPublicationFound()) + if (publicationResult != null) { datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); - datasetStatus.setPublicationType(publicationResult.getPublicationType()); + datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); } @@ -440,10 +393,10 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, Publicati datasetStatus.setLastReminderDate(new Date()); // Save publication search results if found and not already cached - if (publicationResult.isPublicationFound() && StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + if (publicationResult != null && StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) { datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); - datasetStatus.setPublicationType(publicationResult.getPublicationType()); + datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); } From e4eae099b3a019116b46a9a4a753fb7b21f7eece Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 24 Feb 2026 15:55:01 -0800 Subject: [PATCH 07/27] Get date on publication and compare publication date with experiment create date in the sorting logic --- .../ncbi/NcbiPublicationSearchService.java | 155 ++++++++++++++---- .../panoramapublic/ncbi/PublicationMatch.java | 13 +- 2 files changed, 132 insertions(+), 36 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index bd44e203..dae41e4d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -1,11 +1,14 @@ package org.labkey.panoramapublic.ncbi; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.json.JSONArray; +import org.json.JSONException; import org.json.JSONObject; +import org.labkey.api.util.JsonUtil; import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; @@ -13,14 +16,20 @@ import org.labkey.panoramapublic.ncbi.PublicationMatch.PublicationType; import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -95,6 +104,7 @@ public static List searchForPublication(@NotNull ExperimentAnn maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS)); log.info("Starting publication search for experiment: " + expAnnotations.getId()); + // Search PubMed Central first List matchedArticles = searchPmc(expAnnotations, log); // If no PMC results, fall back to PubMed @@ -115,7 +125,7 @@ public static List searchForPublication(@NotNull ExperimentAnn { matchedArticles = matchedArticles.subList(0, maxResults); } - log.info("Returning " + matchedArticles.size() + StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); + log.info("Returning {}{}", matchedArticles.size(), StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); return matchedArticles; } @@ -130,12 +140,12 @@ public static List searchForPublication(@NotNull ExperimentAnn // Step 1: Search PMC by PX ID if (!StringUtils.isBlank(expAnnotations.getPxid())) { - log.debug("Searching PMC by PX ID: " + expAnnotations.getPxid()); + log.debug("Searching PMC by PX ID: {}", expAnnotations.getPxid()); List ids = searchPmc(quote(expAnnotations.getPxid())); if (!ids.isEmpty()) { pmcIdsByStrategy.put(MATCH_PX_ID, ids); - log.debug("Found " + ids.size() + " PMC articles by PX ID"); + log.debug("Found {} PMC articles by PX ID", ids.size()); } rateLimit(); } @@ -144,12 +154,12 @@ public static List searchForPublication(@NotNull ExperimentAnn if (expAnnotations.getShortUrl() != null) { String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); - log.debug("Searching PMC by Panorama URL: " + panoramaUrl); + log.debug("Searching PMC by Panorama URL: {}", panoramaUrl); List ids = searchPmc(quote(panoramaUrl)); if (!ids.isEmpty()) { pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); - log.debug("Found " + ids.size() + " PMC articles by Panorama URL"); + log.debug("Found {} PMC articles by Panorama URL", ids.size()); } rateLimit(); } @@ -190,7 +200,7 @@ public static List searchForPublication(@NotNull ExperimentAnn List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); // Apply priority filtering - return applyPriorityFiltering(pmcArticles, log); + return applyPriorityFiltering(pmcArticles, expAnnotations.getCreated(), log); } /** @@ -217,18 +227,29 @@ private static List executeSearch(String query, String database) HttpURLConnection conn = null; try { - String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); + String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); String urlString = ESEARCH_URL + "?db=" + database + "&term=" + encodedQuery + "&retmax=" + MAX_RESULTS + "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); conn = openGet(urlString); + int responseCode = conn.getResponseCode(); + if (responseCode != 200) + { + LOG.warn("NCBI ESearch returned non-200 response: " + responseCode); + return Collections.emptyList(); + } + // Parse JSON response JSONObject json = readJson(conn); + if (json == null) + { + return Collections.emptyList(); + } JSONObject eSearchResult = json.getJSONObject("esearchresult"); JSONArray idList = eSearchResult.getJSONArray("idlist"); @@ -239,7 +260,7 @@ private static List executeSearch(String query, String database) } return ids; } - catch (Exception e) + catch (IOException | JSONException e) { LOG.error("Error searching " + database + " with query: " + query, e); return Collections.emptyList(); @@ -250,7 +271,7 @@ private static List executeSearch(String query, String database) } } - private static HttpURLConnection openGet(String urlString) throws Exception + private static HttpURLConnection openGet(String urlString) throws IOException { URL url = new URL(urlString); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); @@ -260,38 +281,40 @@ private static HttpURLConnection openGet(String urlString) throws Exception return conn; } - private static JSONObject readJson(HttpURLConnection conn) throws Exception + private static @Nullable JSONObject readJson(HttpURLConnection conn) { - try (BufferedReader reader = - new BufferedReader(new InputStreamReader(conn.getInputStream()))) + try (InputStream in = conn.getInputStream()) { - StringBuilder sb = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) sb.append(line); - return new JSONObject(sb.toString()); + String response = IOUtils.toString(in, StandardCharsets.UTF_8); + return new JSONObject(response); + } + catch (IOException | JSONException e) + { + LOG.error("Error reading JSON response", e); + return null; } } /** * Fetch metadata for PMC articles using ESummary API */ - private static Map fetchPmcMetadata(Collection pmcIds) + private static Map fetchPmcMetadata(Collection pmcIds, Logger log) { - return fetchMetadata(pmcIds, "pmc"); + return fetchMetadata(pmcIds, "pmc", log); } /** * Fetch metadata for PubMed articles using ESummary API */ - private static Map fetchPubMedMetadata(Collection pmids) + private static Map fetchPubMedMetadata(Collection pmids, Logger log) { - return fetchMetadata(pmids, "pubmed"); + return fetchMetadata(pmids, "pubmed", log); } /** * Fetch metadata using NCBI ESummary API (batch request) */ - private static Map fetchMetadata(Collection ids, String database) + private static Map fetchMetadata(Collection ids, String database, Logger log) { if (ids.isEmpty()) return Collections.emptyMap(); @@ -301,20 +324,24 @@ private static Map fetchMetadata(Collection ids, Str String idString = String.join(",", ids); String urlString = ESUMMARY_URL + "?db=" + database + - "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8.toString()) + + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8.toString()); + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); conn = openGet(urlString); int responseCode = conn.getResponseCode(); if (responseCode != 200) { - LOG.warn("NCBI ESummary returned non-200 response: " + responseCode); + log.warn("NCBI ESummary returned non-200 response: " + responseCode); return Collections.emptyMap(); } JSONObject json = readJson(conn); + if (json == null) + { + return Collections.emptyMap(); + } JSONObject result = json.optJSONObject("result"); if (result == null) return Collections.emptyMap(); @@ -330,9 +357,14 @@ private static Map fetchMetadata(Collection ids, Str } return metadata; } - catch (Exception e) + catch (IOException e) + { + log.error("Error fetching " + database + " metadata for IDs: " + ids, e); + return Collections.emptyMap(); + } + catch (JSONException e) { - LOG.error("Error fetching " + database + " metadata for IDs: " + ids, e); + log.error("Error fetching " + database + " metadata for IDs: " + ids, e); return Collections.emptyMap(); } finally @@ -353,7 +385,7 @@ private static List fetchAndVerifyPmcArticles( if (pmcIds.isEmpty()) return Collections.emptyList(); // Fetch all metadata in batch - Map metadata = fetchPmcMetadata(pmcIds); + Map metadata = fetchPmcMetadata(pmcIds, log); List articles = new ArrayList<>(); String firstName = expAnnotations.getSubmitterUser() != null @@ -392,6 +424,8 @@ private static List fetchAndVerifyPmcArticles( boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); boolean foundByDoi = strategies.contains(MATCH_DOI); + Date pubDate = parsePublicationDate(articleData); + PublicationMatch article = new PublicationMatch( publicationId, publicationType, @@ -399,7 +433,8 @@ private static List fetchAndVerifyPmcArticles( foundByUrl, foundByDoi, authorMatch, - titleMatch + titleMatch, + pubDate ); articles.add(article); @@ -413,7 +448,7 @@ private static List fetchAndVerifyPmcArticles( /** * Apply priority filtering to narrow down results */ - private static List applyPriorityFiltering(List articles, Logger log) + private static List applyPriorityFiltering(List articles, @NotNull Date referenceDate, Logger log) { if (articles.size() <= 1) return articles; @@ -428,7 +463,7 @@ private static List applyPriorityFiltering(List bothMatches = articles.stream() @@ -438,9 +473,28 @@ private static List applyPriorityFiltering(List sortByDateProximity(List articles, @NotNull Date referenceDate) + { + if (articles.size() <= 1) + { + return articles; + } + long refTime = referenceDate.getTime(); + articles = new ArrayList<>(articles); + articles.sort(Comparator.comparingLong(a -> + a.getPublicationDate() != null ? Math.abs(a.getPublicationDate().getTime() - refTime) : Long.MAX_VALUE + )); return articles; } @@ -486,7 +540,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno log.info("PubMed fallback found " + pmids.size() + " result(s), verifying..."); // Fetch metadata and verify - Map metadata = fetchPubMedMetadata(pmids); + Map metadata = fetchPubMedMetadata(pmids, log); List articles = new ArrayList<>(); for (String pmid : pmids) @@ -508,6 +562,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno // Only accept if BOTH match if (authorMatch && titleMatch) { + Date pubDate = parsePublicationDate(articleData); PublicationMatch article = new PublicationMatch( pmid, PublicationType.PMID, @@ -515,7 +570,8 @@ private static List searchPubMed(ExperimentAnnotations expAnno false, // Not found by Panorama URL false, // Not found by DOI true, // Matched by author - true // Matched by title + true, // Matched by title + pubDate ); articles.add(article); log.info("PMID " + pmid + " verified with both Author and Title match"); @@ -700,6 +756,37 @@ private static List extractTitleKeywords(String title) return null; } + /** + * Parse publication date from ESummary metadata. + * Tries "pubdate" first, then "epubdate". Dates are typically in "YYYY Mon DD" or "YYYY Mon" format. + */ + private static @Nullable Date parsePublicationDate(JSONObject metadata) + { + String dateStr = metadata.optString("pubdate", ""); + if (StringUtils.isBlank(dateStr)) + { + dateStr = metadata.optString("epubdate", ""); + } + if (StringUtils.isBlank(dateStr)) + { + return null; + } + + // NCBI dates are typically "YYYY Mon DD", "YYYY Mon", or "YYYY" + String[] formats = {"yyyy MMM dd", "yyyy MMM", "yyyy"}; + for (String format : formats) + { + try + { + return new SimpleDateFormat(format).parse(dateStr); + } + catch (ParseException ignored) + { + } + } + LOG.debug("Unable to parse publication date: {}", dateStr); + return null; + } /** * Rate limiting: wait 400ms between API requests diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 9f241c15..ede43124 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -5,6 +5,7 @@ import org.jetbrains.annotations.Nullable; import org.labkey.panoramapublic.model.DatasetStatus; import java.util.ArrayList; +import java.util.Date; import java.util.List; /** @@ -25,6 +26,7 @@ public class PublicationMatch private final boolean _matchesDoi; private final boolean _matchesAuthor; private final boolean _matchesTitle; + private final @Nullable Date _publicationDate; public enum PublicationType { @@ -57,7 +59,8 @@ public String getLabel() public PublicationMatch(String publicationId, PublicationType publicationType, boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, - boolean matchesAuthor, boolean matchesTitle) + boolean matchesAuthor, boolean matchesTitle, + @Nullable Date publicationDate) { _publicationId = publicationId; _publicationType = publicationType; @@ -66,6 +69,7 @@ public PublicationMatch(String publicationId, PublicationType publicationType, _matchesDoi = matchesDoi; _matchesAuthor = matchesAuthor; _matchesTitle = matchesTitle; + _publicationDate = publicationDate; } public String getPublicationId() @@ -103,6 +107,11 @@ public boolean matchesTitle() return _matchesTitle; } + public @Nullable Date getPublicationDate() + { + return _publicationDate; + } + /** * String representation of what matched for this article. * This is what gets stored in the PublicationMatchInfo database column. @@ -167,6 +176,6 @@ public String getPublicationLabel() } } return new PublicationMatch(datasetStatus.getPotentialPublicationId(), type, - pxId, url, doi, author, title); + pxId, url, doi, author, title, null); } } From 682c0b6f6154400e4724e061fca8d11378e52c3e Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 24 Feb 2026 19:05:40 -0800 Subject: [PATCH 08/27] - Use Apache HttpClient5 classes. - Parameterized log calls. --- .../ncbi/NcbiPublicationSearchService.java | 210 ++++++++---------- 1 file changed, 91 insertions(+), 119 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index dae41e4d..85573ec2 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -1,26 +1,28 @@ package org.labkey.panoramapublic.ncbi; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.config.ConnectionConfig; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.client5.http.impl.io.BasicHttpClientConnectionManager; +import org.apache.hc.client5.http.HttpResponseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.util.Timeout; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; -import org.labkey.api.util.JsonUtil; import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.ncbi.PublicationMatch.PublicationType; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.text.ParseException; @@ -102,7 +104,7 @@ public static List searchForPublication(@NotNull ExperimentAnn { Logger log = getLog(logger); maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS)); - log.info("Starting publication search for experiment: " + expAnnotations.getId()); + log.info("Starting publication search for experiment: {}", expAnnotations.getId()); // Search PubMed Central first List matchedArticles = searchPmc(expAnnotations, log); @@ -141,7 +143,7 @@ public static List searchForPublication(@NotNull ExperimentAnn if (!StringUtils.isBlank(expAnnotations.getPxid())) { log.debug("Searching PMC by PX ID: {}", expAnnotations.getPxid()); - List ids = searchPmc(quote(expAnnotations.getPxid())); + List ids = searchPmc(quote(expAnnotations.getPxid()), log); if (!ids.isEmpty()) { pmcIdsByStrategy.put(MATCH_PX_ID, ids); @@ -155,7 +157,7 @@ public static List searchForPublication(@NotNull ExperimentAnn { String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); log.debug("Searching PMC by Panorama URL: {}", panoramaUrl); - List ids = searchPmc(quote(panoramaUrl)); + List ids = searchPmc(quote(panoramaUrl), log); if (!ids.isEmpty()) { pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); @@ -170,12 +172,12 @@ public static List searchForPublication(@NotNull ExperimentAnn String doiUrl = expAnnotations.getDoi().startsWith("http") ? expAnnotations.getDoi() : DataCiteService.toUrl(expAnnotations.getDoi()); - log.debug("Searching PMC by DOI: " + doiUrl); - List ids = searchPmc(quote(doiUrl)); + log.debug("Searching PMC by DOI: {}", doiUrl); + List ids = searchPmc(quote(doiUrl), log); if (!ids.isEmpty()) { pmcIdsByStrategy.put(MATCH_DOI, ids); - log.debug("Found " + ids.size() + " PMC articles by DOI"); + log.debug("Found {} PMC articles by DOI", ids.size()); } rateLimit(); } @@ -194,7 +196,7 @@ public static List searchForPublication(@NotNull ExperimentAnn } } - log.info("Total unique PMC IDs found: " + uniquePmcIds.size()); + log.info("Total unique PMC IDs found: {}", uniquePmcIds.size()); // Fetch and verify PMC articles List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); @@ -206,50 +208,35 @@ public static List searchForPublication(@NotNull ExperimentAnn /** * Search PubMed Central with the given query */ - private static List searchPmc(String query) + private static List searchPmc(String query, Logger log) { - return executeSearch(query, "pmc"); + return executeSearch(query, "pmc", log); } /** * Search PubMed with the given query */ - private static List searchPubMed(String query) + private static List searchPubMed(String query, Logger log) { - return executeSearch(query, "pubmed"); + return executeSearch(query, "pubmed", log); } /** * Execute search using NCBI ESearch API */ - private static List executeSearch(String query, String database) + private static List executeSearch(String query, String database, Logger log) { - HttpURLConnection conn = null; + String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = ESEARCH_URL + + "?db=" + database + + "&term=" + encodedQuery + + "&retmax=" + MAX_RESULTS + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + try { - String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); - String urlString = ESEARCH_URL + - "?db=" + database + - "&term=" + encodedQuery + - "&retmax=" + MAX_RESULTS + - "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); - - conn = openGet(urlString); - - int responseCode = conn.getResponseCode(); - if (responseCode != 200) - { - LOG.warn("NCBI ESearch returned non-200 response: " + responseCode); - return Collections.emptyList(); - } - - // Parse JSON response - JSONObject json = readJson(conn); - if (json == null) - { - return Collections.emptyList(); - } + JSONObject json = getJson(url); JSONObject eSearchResult = json.getJSONObject("esearchresult"); JSONArray idList = eSearchResult.getJSONArray("idlist"); @@ -262,36 +249,45 @@ private static List executeSearch(String query, String database) } catch (IOException | JSONException e) { - LOG.error("Error searching " + database + " with query: " + query, e); + log.error("Error searching {} with query: {}", database, query, e); return Collections.emptyList(); } - finally - { - if (conn != null) conn.disconnect(); - } } - private static HttpURLConnection openGet(String urlString) throws IOException - { - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - conn.setConnectTimeout(TIMEOUT_MS); - conn.setReadTimeout(TIMEOUT_MS); - return conn; - } - - private static @Nullable JSONObject readJson(HttpURLConnection conn) + /** + * Execute an HTTP GET request and parse the response as JSON. + * @throws IOException if the request fails or the server returns a non-2xx response + * @throws JSONException if the response body is not valid JSON + */ + private static JSONObject getJson(String url) throws IOException { - try (InputStream in = conn.getInputStream()) - { - String response = IOUtils.toString(in, StandardCharsets.UTF_8); - return new JSONObject(response); - } - catch (IOException | JSONException e) - { - LOG.error("Error reading JSON response", e); - return null; + ConnectionConfig connectionConfig = ConnectionConfig.custom() + .setConnectTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .setSocketTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + RequestConfig requestConfig = RequestConfig.custom() + .setResponseTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + BasicHttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(); + connectionManager.setConnectionConfig(connectionConfig); + + try (CloseableHttpClient client = HttpClientBuilder.create() + .setDefaultRequestConfig(requestConfig) + .setConnectionManager(connectionManager) + .build()) + { + HttpGet get = new HttpGet(url); + return client.execute(get, response -> { + int status = response.getCode(); + if (status < 200 || status >= 300) + { + throw new HttpResponseException(status, response.getReasonPhrase()); + } + String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + return new JSONObject(body); + }); } } @@ -318,30 +314,16 @@ private static Map fetchMetadata(Collection ids, Str { if (ids.isEmpty()) return Collections.emptyMap(); - HttpURLConnection conn = null; + String idString = String.join(",", ids); + String url = ESUMMARY_URL + + "?db=" + database + + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + try { - String idString = String.join(",", ids); - String urlString = ESUMMARY_URL + - "?db=" + database + - "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + - "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); - - conn = openGet(urlString); - - int responseCode = conn.getResponseCode(); - if (responseCode != 200) - { - log.warn("NCBI ESummary returned non-200 response: " + responseCode); - return Collections.emptyMap(); - } - - JSONObject json = readJson(conn); - if (json == null) - { - return Collections.emptyMap(); - } + JSONObject json = getJson(url); JSONObject result = json.optJSONObject("result"); if (result == null) return Collections.emptyMap(); @@ -357,20 +339,11 @@ private static Map fetchMetadata(Collection ids, Str } return metadata; } - catch (IOException e) - { - log.error("Error fetching " + database + " metadata for IDs: " + ids, e); - return Collections.emptyMap(); - } - catch (JSONException e) + catch (IOException | JSONException e) { - log.error("Error fetching " + database + " metadata for IDs: " + ids, e); + log.error("Error fetching {} metadata for IDs: {}", database, ids, e); return Collections.emptyMap(); } - finally - { - if (conn != null) conn.disconnect(); - } } /** @@ -403,7 +376,7 @@ private static List fetchAndVerifyPmcArticles( { String source = articleData.optString("source", ""); String journal = articleData.optString("fulljournalname", ""); - log.info("Excluded preprint PMC" + pmcId + ": source=\"" + source + "\", journal=\"" + journal + "\""); + log.info("Excluded preprint PMC{}: source=\"{}\", journal=\"{}\"", pmcId, source, journal); continue; } @@ -412,7 +385,7 @@ private static List fetchAndVerifyPmcArticles( boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); // Extract PubMed ID, is found - String pubMedId = extractPubMedId(articleData, pmcId); + String pubMedId = extractPubMedId(articleData); String publicationId = pubMedId == null ? pmcId : pubMedId; PublicationType publicationType = pubMedId == null ? PublicationType.PMC : PublicationType.PMID; @@ -424,7 +397,7 @@ private static List fetchAndVerifyPmcArticles( boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); boolean foundByDoi = strategies.contains(MATCH_DOI); - Date pubDate = parsePublicationDate(articleData); + Date pubDate = parsePublicationDate(articleData, log); PublicationMatch article = new PublicationMatch( publicationId, @@ -438,8 +411,7 @@ private static List fetchAndVerifyPmcArticles( ); articles.add(article); - log.info("PMC" + pmcId + " -> " + publicationType + " " + publicationId + - " | Match info: " + article.getMatchInfo()); + log.info("PMC{} -> {} {} | Match info: {}", pmcId, publicationType, publicationId, article.getMatchInfo()); } return articles; @@ -459,7 +431,7 @@ private static List applyPriorityFiltering(List applyPriorityFiltering(List searchPubMed(ExperimentAnnotations expAnno String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", lastName, firstName, title); - log.debug("PubMed fallback query: " + query); - List pmids = searchPubMed(query); + log.debug("PubMed fallback query: {}", query); + List pmids = searchPubMed(query, log); if (pmids.isEmpty()) { @@ -537,7 +509,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno return Collections.emptyList(); } - log.info("PubMed fallback found " + pmids.size() + " result(s), verifying..."); + log.info("PubMed fallback found {} result(s), verifying...", pmids.size()); // Fetch metadata and verify Map metadata = fetchPubMedMetadata(pmids, log); @@ -551,7 +523,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno // Filter out preprints if (isPreprint(articleData)) { - log.info("Excluded preprint PMID " + pmid); + log.info("Excluded preprint PMID {}", pmid); continue; } @@ -562,7 +534,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno // Only accept if BOTH match if (authorMatch && titleMatch) { - Date pubDate = parsePublicationDate(articleData); + Date pubDate = parsePublicationDate(articleData, log); PublicationMatch article = new PublicationMatch( pmid, PublicationType.PMID, @@ -574,11 +546,11 @@ private static List searchPubMed(ExperimentAnnotations expAnno pubDate ); articles.add(article); - log.info("PMID " + pmid + " verified with both Author and Title match"); + log.info("PMID {} verified with both Author and Title match", pmid); } } - log.info("Verified " + articles.size() + " of " + pmids.size() + " PubMed results"); + log.info("Verified {} of {} PubMed results", articles.size(), pmids.size()); return articles; } @@ -611,7 +583,7 @@ private static boolean checkAuthorMatch(JSONObject metadata, String firstName, S } JSONArray authors = metadata.optJSONArray("authors"); - if (authors == null || authors.length() == 0) + if (authors == null || authors.isEmpty()) { return false; } @@ -737,7 +709,7 @@ private static List extractTitleKeywords(String title) /** * Extract PMID from PMC metadata */ - private static @Nullable String extractPubMedId(JSONObject pmcMetadata, String pmcId) + private static @Nullable String extractPubMedId(JSONObject pmcMetadata) { // PMC articles contain PMID in articleids array JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); @@ -760,7 +732,7 @@ private static List extractTitleKeywords(String title) * Parse publication date from ESummary metadata. * Tries "pubdate" first, then "epubdate". Dates are typically in "YYYY Mon DD" or "YYYY Mon" format. */ - private static @Nullable Date parsePublicationDate(JSONObject metadata) + private static @Nullable Date parsePublicationDate(JSONObject metadata, Logger log) { String dateStr = metadata.optString("pubdate", ""); if (StringUtils.isBlank(dateStr)) @@ -784,7 +756,7 @@ private static List extractTitleKeywords(String title) { } } - LOG.debug("Unable to parse publication date: {}", dateStr); + log.debug("Unable to parse publication date: {}", dateStr); return null; } From cb139e09166d315380bd8479ace8aeb86b3079a5 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 24 Feb 2026 19:43:33 -0800 Subject: [PATCH 09/27] Made constant public --- .../src/org/labkey/panoramapublic/PanoramaPublicController.java | 2 +- .../panoramapublic/ncbi/NcbiPublicationSearchService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index bd676184..de1ae286 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10530,7 +10530,7 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public class RequestDeletionAction extends UpdateDatasetStatusAction + public static class RequestDeletionAction extends UpdateDatasetStatusAction { @Override protected String getConfirmViewTitle() diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 85573ec2..860a5c6a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -56,7 +56,7 @@ public class NcbiPublicationSearchService // API parameters private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec - private static final int MAX_RESULTS = 5; + public static final int MAX_RESULTS = 5; private static final int TIMEOUT_MS = 10000; // 10 seconds private static final String NCBI_EMAIL = "panorama@proteinms.net"; From 324a6040527e3057eda408e510491899e103e870 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Wed, 25 Feb 2026 12:37:53 -0800 Subject: [PATCH 10/27] Added a "Search Publications" link to the TargetedMSExperimentWebPart context menu. --- .../PanoramaPublicController.java | 317 +++++++++++++++--- .../panoramapublic/ncbi/PublicationMatch.java | 35 +- .../TargetedMSExperimentWebPart.java | 1 + 3 files changed, 294 insertions(+), 59 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index de1ae286..9dcaf0ec 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10650,72 +10650,299 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } } - @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public static class FindPublicationForDatasetAction extends ReadOnlyApiAction + @RequiresPermission(AdminOperationsPermission.class) + public static class FindPublicationsForDatasetAction extends SimpleViewAction { + private ExperimentAnnotations _exptAnnotations; + @Override - public Object execute(ShortUrlForm shortUrlForm, BindException errors) throws Exception + public ModelAndView getView(ExperimentIdForm form, BindException errors) { - ApiSimpleResponse response = new ApiSimpleResponse(); - - ExperimentAnnotations exptAnnotations = getValidExperimentAnnotations(shortUrlForm, errors); - if (exptAnnotations == null) + _exptAnnotations = getValidExperiment(form, getContainer(), getViewContext(), errors); + if (_exptAnnotations == null) { - errors.reject(ERROR_MSG, "Unable to find experiment for the provided short URL"); - return null; + return new SimpleErrorView(errors); } + List matches; try { - // Perform the publication search - PublicationMatch articleMatch = - NcbiPublicationSearchService.searchForPublication(exptAnnotations, null); + matches = NcbiPublicationSearchService.searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null); + } + catch (Exception e) + { + LOG.error("Error searching for publications for experiment " + _exptAnnotations.getId(), e); + errors.reject(ERROR_MSG, "Error searching for publications: " + e.getMessage()); + return new SimpleErrorView(errors); + } - response.put("success", true); - response.put("publicationFound", articleMatch != null); + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(_exptAnnotations); - if (articleMatch != null) - { - response.put("publicationId", articleMatch.getPublicationId()); - response.put("publicationType", articleMatch.getPublicationType().name()); - response.put("matchInfo", articleMatch.getMatchInfo()); - response.put("publicationUrl", articleMatch.getPublicationUrl()); + VBox view = new VBox(); + view.setFrame(WebPartView.FrameType.PORTAL); + view.setTitle("Find Publications for Dataset"); - // Update DatasetStatus with the search result - DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); - if (datasetStatus == null) + // Dataset info section + List infoRows = new ArrayList<>(); + ShortURLRecord shortUrl = _exptAnnotations.getShortUrl(); + if (shortUrl != null) + { + infoRows.add(row("Title:", LinkBuilder.simpleLink(_exptAnnotations.getTitle(), shortUrl.renderShortURL()))); + } + else + { + infoRows.add(row("Title:", _exptAnnotations.getTitle())); + } + infoRows.add(row("Created:", DateUtil.formatDateTime(_exptAnnotations.getCreated(), "yyyy-MM-dd"))); + String submitterName = _exptAnnotations.getSubmitterName(); + infoRows.add(row("Submitter:", submitterName != null ? submitterName : "Unknown")); + if (_exptAnnotations.hasPxid()) + { + infoRows.add(row("PX ID:", LinkBuilder.simpleLink(_exptAnnotations.getPxid(), + "https://proteomecentral.proteomexchange.org/cgi/GetDataset?ID=" + PageFlowUtil.encode(_exptAnnotations.getPxid())))); + } + if (_exptAnnotations.hasDoi()) + { + infoRows.add(row("DOI:", _exptAnnotations.getDoi())); + } + view.addView(new HtmlView(TABLE(cl("lk-fields-table"), infoRows))); + + // Publications section + if (!matches.isEmpty()) + { + // Check if any displayed match was dismissed by the user + boolean showDismissedColumn = false; + String dismissedPubId = null; + if (datasetStatus != null && Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication()) + && datasetStatus.getPotentialPublicationId() != null) + { + dismissedPubId = datasetStatus.getPotentialPublicationId(); + for (PublicationMatch match : matches) { - datasetStatus = new DatasetStatus(); - datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); - datasetStatus.setPotentialPublicationId(articleMatch.getPublicationId()); - datasetStatus.setPublicationType(articleMatch.getPublicationType().name()); - datasetStatus.setPublicationMatchInfo(articleMatch.getMatchInfo()); - DatasetStatusManager.save(datasetStatus, getUser()); + if (match.getPublicationId().equals(dismissedPubId)) + { + showDismissedColumn = true; + break; + } } - else + } + + ActionURL postUrl = new ActionURL(NotifySubmitterOfPublicationsAction.class, getContainer()); + List formContents = new ArrayList<>(); + formContents.add(INPUT(at(type, "hidden", name, "id", value, _exptAnnotations.getId()))); + formContents.add(INPUT(at(type, "hidden", name, "publicationType", value, ""))); + formContents.add(INPUT(at(type, "hidden", name, "matchInfo", value, ""))); + + // Table header + List headerCells = new ArrayList<>(); + headerCells.add(TH(cl("labkey-col-header"), "Select")); + headerCells.add(TH(cl("labkey-col-header"), "Publication ID")); + headerCells.add(TH(cl("labkey-col-header"), "Matches")); + if (showDismissedColumn) + { + headerCells.add(TH(cl("labkey-col-header"), "User Dismissed")); + } + DOM.Renderable headerRow = TR(headerCells); + + // Table rows + List tableRows = new ArrayList<>(); + tableRows.add(headerRow); + String finalDismissedPubId = dismissedPubId; + boolean finalShowDismissedColumn = showDismissedColumn; + String rowCls; + int rowIdx = 0; + for (PublicationMatch match : matches) + { + String pubId = match.getPublicationId(); + List cells = new ArrayList<>(); + cells.add(TD(INPUT(at(type, "radio", name, "publicationId", value, pubId) + .data("publicationType", match.getPublicationType().name()) + .data("matchInfo", match.getMatchInfo())))); + cells.add(TD(LinkBuilder.simpleLink(match.getPublicationLabel(), match.getPublicationUrl()))); + cells.add(TD(match.getMatchInfo())); + + if (finalShowDismissedColumn) { - datasetStatus.setPotentialPublicationId(articleMatch.getPublicationId()); - datasetStatus.setPublicationType(articleMatch.getPublicationType().name()); - datasetStatus.setPublicationMatchInfo(articleMatch.getMatchInfo()); - datasetStatus.setUserDismissedPublication(false); // Reset dismissal flag - DatasetStatusManager.update(datasetStatus, getUser()); + cells.add(TD(pubId.equals(finalDismissedPubId) ? "Yes" : "")); } - response.put("message", "Publication found: " + articleMatch.getPublicationLabel()); - } - else - { - response.put("message", "No publication found for this dataset"); + rowCls = (rowIdx++) % 2 == 0 ? "labkey-alternate-row" : "labkey-row"; + tableRows.add(TR(cl(rowCls), cells)); } + + formContents.add(TABLE(cl("labkey-data-region labkey-show-borders table-bordered table-condensed"), tableRows)); + formContents.add(BR()); + formContents.add(new ButtonBuilder("Notify Submitter").submit(true).build()); + + view.addView(new HtmlView(FORM(at(method, "POST", action, postUrl), formContents))); + view.addView(new HtmlView(SCRIPT(HtmlString.unsafe( + "document.querySelectorAll('input[name=\"publicationId\"]').forEach(function(radio) {\n" + + " radio.addEventListener('change', function() {\n" + + " var form = this.closest('form');\n" + + " form.querySelector('input[name=\"publicationType\"]').value = this.dataset.publicationtype;\n" + + " form.querySelector('input[name=\"matchInfo\"]').value = this.dataset.matchinfo;\n" + + " });\n" + + "});" + )))); } - catch (Exception e) + else { - LOG.error("Error searching for publication for experiment " + exptAnnotations.getId(), e); - errors.reject(ERROR_MSG, "Error searching for publication: " + e.getMessage()); - return null; + view.addView(new HtmlView(DIV(at(style, "margin-top:10px;"), "No publications found for this dataset."))); } - return response; + return view; + } + + @Override + public void addNavTrail(NavTree root) + { + root.addChild("Publication Matches for Dataset"); + } + } + + public static class NotifySubmitterForm extends IdForm + { + private String _publicationId; + private String _publicationType; + private String _matchInfo; + + public String getPublicationId() + { + return _publicationId; + } + + public void setPublicationId(String publicationId) + { + _publicationId = publicationId; + } + + public String getPublicationType() + { + return _publicationType; + } + + public void setPublicationType(String publicationType) + { + _publicationType = publicationType; + } + + public String getMatchInfo() + { + return _matchInfo; + } + + public void setMatchInfo(String matchInfo) + { + _matchInfo = matchInfo; + } + + } + + @RequiresPermission(AdminOperationsPermission.class) + public static class NotifySubmitterOfPublicationsAction extends FormHandlerAction + { + @Override + public void validateCommand(NotifySubmitterForm form, Errors errors) + { + if (form.getId() <= 0) + { + errors.reject(ERROR_MSG, "Missing experiment annotations ID"); + } + if (StringUtils.isBlank(form.getPublicationId())) + { + errors.reject(ERROR_MSG, "No publication was selected"); + } + } + + @Override + public boolean handlePost(NotifySubmitterForm form, BindException errors) throws Exception + { + ExperimentAnnotations exptAnnotations = ExperimentAnnotationsManager.get(form.getId()); + if (exptAnnotations == null) + { + errors.reject(ERROR_MSG, "No experiment found for Id " + form.getId()); + return false; + } + + PublicationMatch.PublicationType pubType = PublicationMatch.PublicationType.fromString(form.getPublicationType()); + if (pubType == null) + { + errors.reject(ERROR_MSG, "Invalid publication type: " + form.getPublicationType()); + return false; + } + + // Reconstruct PublicationMatch from form fields + PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo()); + + // Post notification + JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); + if (submission == null) + { + errors.reject(ERROR_MSG, "No journal submission found for experiment " + exptAnnotations.getId()); + return false; + } + Journal journal = JournalManager.getJournal(submission.getJournalId()); + if (journal == null) + { + errors.reject(ERROR_MSG, "No journal found for submission"); + return false; + } + + Container announcementsContainer = journal.getSupportContainer(); + Announcement announcement = submission.getAnnouncement(AnnouncementService.get(), announcementsContainer, getUser()); + if (announcement == null) + { + errors.reject(ERROR_MSG, "No support thread found for this submission"); + return false; + } + + User submitter = exptAnnotations.getSubmitterUser(); + if (submitter == null) + { + errors.reject(ERROR_MSG, "Could not find submitter for experiment " + exptAnnotations.getId()); + return false; + } + + List notifyUsers = new ArrayList<>(); + notifyUsers.add(submitter); + if (exptAnnotations.getLabHeadUser() != null) + { + notifyUsers.add(exptAnnotations.getLabHeadUser()); + } + + PanoramaPublicNotification.postPrivateDataReminderMessage( + journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, + announcement, announcementsContainer, getUser(), selectedMatch); + + // Update DatasetStatus + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); + if (datasetStatus == null) + { + datasetStatus = new DatasetStatus(); + datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); + } + datasetStatus.setPotentialPublicationId(form.getPublicationId()); + datasetStatus.setPublicationType(pubType.name()); + datasetStatus.setPublicationMatchInfo(form.getMatchInfo()); + datasetStatus.setUserDismissedPublication(false); + datasetStatus.setLastReminderDate(new Date()); + + if (datasetStatus.getId() == 0) + { + DatasetStatusManager.save(datasetStatus, getUser()); + } + else + { + DatasetStatusManager.update(datasetStatus, getUser()); + } + + return true; + } + + @Override + public ActionURL getSuccessURL(NotifySubmitterForm form) + { + return new ActionURL(FindPublicationsForDatasetAction.class, getContainer()).addParameter("id", form.getId()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index ede43124..2b4f2209 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -145,20 +145,11 @@ public String getPublicationLabel() } /** - * Build a PublicationMatch from a persisted DatasetStatus. + * Build a PublicationMatch from a publication ID, type, and match info string. + * @see #getMatchInfo() */ - public static @Nullable PublicationMatch fromDatasetStatus(@NotNull DatasetStatus datasetStatus) + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull PublicationType publicationType, @Nullable String matchInfo) { - if (StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) - { - return null; - } - PublicationType type = PublicationType.fromString(datasetStatus.getPublicationType()); - if (type == null) - { - return null; - } - String matchInfo = datasetStatus.getPublicationMatchInfo(); boolean pxId = false, url = false, doi = false, author = false, title = false; if (!StringUtils.isBlank(matchInfo)) { @@ -175,7 +166,23 @@ public String getPublicationLabel() } } } - return new PublicationMatch(datasetStatus.getPotentialPublicationId(), type, - pxId, url, doi, author, title, null); + return new PublicationMatch(publicationId, publicationType, pxId, url, doi, author, title, null); + } + + /** + * Build a PublicationMatch from a persisted DatasetStatus. + */ + public static @Nullable PublicationMatch fromDatasetStatus(@NotNull DatasetStatus datasetStatus) + { + if (StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + { + return null; + } + PublicationType type = PublicationType.fromString(datasetStatus.getPublicationType()); + if (type == null) + { + return null; + } + return fromMatchInfo(datasetStatus.getPotentialPublicationId(), type, datasetStatus.getPublicationMatchInfo()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java index b0cb1624..b8452971 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java +++ b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java @@ -86,6 +86,7 @@ else if(expAnnotations.getContainer().equals(container)) navTree.addChild("DOI", new ActionURL(PanoramaPublicController.DoiOptionsAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Make Data Public", new ActionURL(PanoramaPublicController.MakePublicAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Post to Bluesky", new ActionURL(PanoramaPublicController.PostToBlueskyOptionsAction.class, container).addParameter("id", expAnnotations.getId())); + navTree.addChild("Search Publications", new ActionURL(PanoramaPublicController.FindPublicationsForDatasetAction.class, container).addParameter("id", expAnnotations.getId())); JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(expAnnotations); if (submission != null) From fd64b392be3635f0732baca3fceee4eee135500c Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 26 Feb 2026 13:46:01 -0800 Subject: [PATCH 11/27] Renamed action classes. Added SearchPublicationsAction and SearchPublicationsForDatasetApiAction --- .../PanoramaPublicController.java | 151 ++++++++++-- .../message/PrivateDataReminderSettings.java | 26 +- .../ncbi/NcbiPublicationSearchService.java | 2 +- .../pipeline/PrivateDataReminderJob.java | 2 +- .../TargetedMSExperimentWebPart.java | 2 +- .../view/privateDataRemindersSettingsForm.jsp | 22 +- .../view/searchPublicationsForm.jsp | 233 ++++++++++++++++++ 7 files changed, 399 insertions(+), 39 deletions(-) create mode 100644 panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 9dcaf0ec..1fc1faec 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -236,6 +236,7 @@ import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.List; import java.util.Objects; import java.util.Set; @@ -10137,7 +10138,7 @@ public ModelAndView getView(PrivateDataReminderSettingsForm form, boolean reshow form.setDelayUntilFirstReminder(settings.getDelayUntilFirstReminder()); form.setReminderFrequency(settings.getReminderFrequency()); form.setExtensionLength(settings.getExtensionLength()); - form.setEnablePublicationCheck(settings.isEnablePublicationCheck()); + form.setEnablePublicationSearch(settings.isEnablePublicationSearch()); } VBox view = new VBox(); @@ -10156,7 +10157,7 @@ public boolean handlePost(PrivateDataReminderSettingsForm form, BindException er settings.setDelayUntilFirstReminder(form.getDelayUntilFirstReminder()); settings.setReminderFrequency(form.getReminderFrequency()); settings.setExtensionLength(form.getExtensionLength()); - settings.setEnablePublicationCheck(form.isEnablePublicationCheck()); + settings.setEnablePublicationSearch(form.isEnablePublicationSearch()); PrivateDataReminderSettings.save(settings); PrivateDataMessageScheduler.getInstance().initialize(settings.isEnableReminders()); @@ -10194,7 +10195,7 @@ public static class PrivateDataReminderSettingsForm private Integer _extensionLength; private Integer _reminderFrequency; private Integer _delayUntilFirstReminder; - private boolean _enablePublicationCheck; + private boolean _enablePublicationSearch; public boolean isEnabled() { @@ -10246,14 +10247,14 @@ public void setDelayUntilFirstReminder(Integer delayUntilFirstReminder) _delayUntilFirstReminder = delayUntilFirstReminder; } - public boolean isEnablePublicationCheck() + public boolean isEnablePublicationSearch() { - return _enablePublicationCheck; + return _enablePublicationSearch; } - public void setEnablePublicationCheck(boolean enablePublicationCheck) + public void setEnablePublicationSearch(boolean enablePublicationSearch) { - _enablePublicationCheck = enablePublicationCheck; + _enablePublicationSearch = enablePublicationSearch; } } @@ -10277,7 +10278,7 @@ public ModelAndView getView(PrivateDataSendReminderForm form, boolean reshow, Bi if (!reshow) { PrivateDataReminderSettings settings = PrivateDataReminderSettings.get(); - form.setCheckPublications(settings.isEnablePublicationCheck()); + form.setCheckPublications(settings.isEnablePublicationSearch()); } QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, @@ -10390,6 +10391,65 @@ public void setDataRegionName(String dataRegionName) } } + // region Search Publications + + @RequiresPermission(AdminOperationsPermission.class) + public static class SearchPublicationsAction extends SimpleViewAction + { + @Override + public ModelAndView getView(Object form, BindException errors) + { + Journal panoramaPublic = JournalManager.getJournal(getContainer()); + if (panoramaPublic == null) + { + errors.reject(ERROR_MSG, "Not a Panorama Public folder: " + getContainer().getName()); + return new SimpleErrorView(errors, true); + } + + QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, + PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); + qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); + qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); + + QueryView tableView = new QueryView(new PanoramaPublicSchema(getUser(), getContainer()), qSettings, null); + tableView.setTitle("Private Datasets"); + tableView.setFrame(WebPartView.FrameType.NONE); + tableView.disableContainerFilterSelection(); + + SearchPublicationsBean bean = new SearchPublicationsBean(); + bean.setDataRegionName(tableView.getDataRegionName()); + + JspView jspView = new JspView<>("/org/labkey/panoramapublic/view/searchPublicationsForm.jsp", bean, errors); + VBox view = new VBox(jspView, tableView); + view.setTitle("Search Publications"); + view.setFrame(WebPartView.FrameType.PORTAL); + return view; + } + + @Override + public void addNavTrail(NavTree root) + { + root.addChild("Private Data Reminder Settings", new ActionURL(PrivateDataReminderSettingsAction.class, ContainerManager.getRoot())); + root.addChild("Search Publications"); + } + } + + public static class SearchPublicationsBean + { + private String _dataRegionName; + + public String getDataRegionName() + { + return _dataRegionName; + } + + public void setDataRegionName(String dataRegionName) + { + _dataRegionName = dataRegionName; + } + } + // endregion Search Publications + @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) public abstract static class UpdateDatasetStatusAction extends ConfirmAction { @@ -10402,11 +10462,13 @@ public abstract static class UpdateDatasetStatusAction extends ConfirmAction + public static class SearchPublicationsForDatasetAction extends SimpleViewAction { private ExperimentAnnotations _exptAnnotations; @@ -10800,6 +10860,51 @@ public void addNavTrail(NavTree root) } } + @RequiresPermission(AdminOperationsPermission.class) + public static class SearchPublicationsForDatasetApiAction extends ReadOnlyApiAction + { + @Override + public Object execute(ExperimentIdForm form, BindException errors) + { + ApiSimpleResponse response = new ApiSimpleResponse(); + ExperimentAnnotations exptAnnotations = form.lookupExperiment(); + if (exptAnnotations == null) + { + response.put("success", false); + response.put("error", "No experiment found for Id " + form.getId()); + return response; + } + + try + { + List matches = NcbiPublicationSearchService.searchForPublication(exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null); + response.put("success", true); + response.put("papersFound", matches.size()); + + List> matchList = new ArrayList<>(); + for (PublicationMatch match : matches) + { + Map matchMap = new HashMap<>(); + matchMap.put("publicationId", match.getPublicationId()); + matchMap.put("publicationType", match.getPublicationType().name()); + matchMap.put("publicationLabel", match.getPublicationLabel()); + matchMap.put("publicationUrl", match.getPublicationUrl()); + matchMap.put("matchInfo", match.getMatchInfo()); + matchList.add(matchMap); + } + response.put("matches", matchList); + } + catch (Exception e) + { + LOG.error("Error searching for publications for experiment " + form.getId(), e); + response.put("success", false); + response.put("error", "Error searching for publications: " + e.getMessage()); + } + + return response; + } + } + public static class NotifySubmitterForm extends IdForm { private String _publicationId; @@ -10841,6 +10946,9 @@ public void setMatchInfo(String matchInfo) @RequiresPermission(AdminOperationsPermission.class) public static class NotifySubmitterOfPublicationsAction extends FormHandlerAction { + private Container _announcementsContainer; + private Announcement _announcement; + @Override public void validateCommand(NotifySubmitterForm form, Errors errors) { @@ -10888,9 +10996,9 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws return false; } - Container announcementsContainer = journal.getSupportContainer(); - Announcement announcement = submission.getAnnouncement(AnnouncementService.get(), announcementsContainer, getUser()); - if (announcement == null) + _announcementsContainer = journal.getSupportContainer(); + _announcement = submission.getAnnouncement(AnnouncementService.get(), _announcementsContainer, getUser()); + if (_announcement == null) { errors.reject(ERROR_MSG, "No support thread found for this submission"); return false; @@ -10912,7 +11020,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws PanoramaPublicNotification.postPrivateDataReminderMessage( journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, - announcement, announcementsContainer, getUser(), selectedMatch); + _announcement, _announcementsContainer, getUser(), selectedMatch); // Update DatasetStatus DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); @@ -10942,7 +11050,8 @@ journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, @Override public ActionURL getSuccessURL(NotifySubmitterForm form) { - return new ActionURL(FindPublicationsForDatasetAction.class, getContainer()).addParameter("id", form.getId()); + return new ActionURL("announcements", "thread", _announcementsContainer) + .addParameter("rowId", _announcement.getRowId()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java index b4cc557e..124e8398 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java +++ b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java @@ -24,14 +24,14 @@ public class PrivateDataReminderSettings public static final String PROP_DELAY_UNTIL_FIRST_REMINDER = "Delay until first reminder (months)"; public static final String PROP_REMINDER_FREQUENCY = "Reminder frequency (months)"; public static final String PROP_EXTENSION_LENGTH = "Extension duration (months)"; - public static final String PROP_ENABLE_PUBLICATION_CHECK = "Enable NCBI publication check"; + public static final String PROP_ENABLE_PUBLICATION_SEARCH = "Enable publication search"; private static final boolean DEFAULT_ENABLE_REMINDERS = false; public static final String DEFAULT_REMINDER_TIME = "8:00 AM"; private static final int DEFAULT_DELAY_UNTIL_FIRST_REMINDER = 12; // Send the first reminder after the data has been private for a year. private static final int DEFAULT_REMINDER_FREQUENCY = 1; // Send reminders once a month, unless extension or deletion was requested. private static final int DEFAULT_EXTENSION_LENGTH = 6; // Private status of a dataset can be extended by 6 months. - private static final boolean DEFAULT_ENABLE_PUBLICATION_CHECK = false; + private static final boolean DEFAULT_ENABLE_PUBLICATION_SEARCH = false; public static final String DATE_FORMAT_PATTERN = "MMMM d, yyyy"; public static final String REMINDER_TIME_FORMAT = "h:mm a"; @@ -42,7 +42,7 @@ public class PrivateDataReminderSettings private int _delayUntilFirstReminder; private int _reminderFrequency; private int _extensionLength; - private boolean _enablePublicationCheck; + private boolean _enablePublicationSearch; public static PrivateDataReminderSettings get() { @@ -74,10 +74,10 @@ public static PrivateDataReminderSettings get() LocalTime reminderTime = tryParseReminderTime(settingsMap.get(PROP_REMINDER_TIME), DEFAULT_REMINDER_TIME); settings.setReminderTime(reminderTime); - boolean enablePublicationCheck = settingsMap.get(PROP_ENABLE_PUBLICATION_CHECK) == null - ? DEFAULT_ENABLE_PUBLICATION_CHECK - : Boolean.valueOf(settingsMap.get(PROP_ENABLE_PUBLICATION_CHECK)); - settings.setEnablePublicationCheck(enablePublicationCheck); + boolean enablePublicationCheck = settingsMap.get(PROP_ENABLE_PUBLICATION_SEARCH) == null + ? DEFAULT_ENABLE_PUBLICATION_SEARCH + : Boolean.valueOf(settingsMap.get(PROP_ENABLE_PUBLICATION_SEARCH)); + settings.setEnablePublicationSearch(enablePublicationCheck); } else { @@ -86,7 +86,7 @@ public static PrivateDataReminderSettings get() settings.setReminderFrequency(DEFAULT_REMINDER_FREQUENCY); settings.setExtensionLength(DEFAULT_EXTENSION_LENGTH); settings.setReminderTime(parseReminderTime(DEFAULT_REMINDER_TIME)); - settings.setEnablePublicationCheck(DEFAULT_ENABLE_PUBLICATION_CHECK); + settings.setEnablePublicationSearch(DEFAULT_ENABLE_PUBLICATION_SEARCH); } return settings; @@ -121,7 +121,7 @@ public static void save(PrivateDataReminderSettings settings) settingsMap.put(PROP_REMINDER_FREQUENCY, String.valueOf(settings.getReminderFrequency())); settingsMap.put(PROP_EXTENSION_LENGTH, String.valueOf(settings.getExtensionLength())); settingsMap.put(PROP_REMINDER_TIME, settings.getReminderTimeFormatted()); - settingsMap.put(PROP_ENABLE_PUBLICATION_CHECK, String.valueOf(settings.isEnablePublicationCheck())); + settingsMap.put(PROP_ENABLE_PUBLICATION_SEARCH, String.valueOf(settings.isEnablePublicationSearch())); settingsMap.save(); } @@ -180,14 +180,14 @@ public void setDelayUntilFirstReminder(int delayUntilFirstReminder) _delayUntilFirstReminder = delayUntilFirstReminder; } - public boolean isEnablePublicationCheck() + public boolean isEnablePublicationSearch() { - return _enablePublicationCheck; + return _enablePublicationSearch; } - public void setEnablePublicationCheck(boolean enablePublicationCheck) + public void setEnablePublicationSearch(boolean enablePublicationSearch) { - _enablePublicationCheck = enablePublicationCheck; + _enablePublicationSearch = enablePublicationSearch; } public @Nullable Date getReminderValidUntilDate(@NotNull DatasetStatus status) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 860a5c6a..71e5669a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -127,7 +127,7 @@ public static List searchForPublication(@NotNull ExperimentAnn { matchedArticles = matchedArticles.subList(0, maxResults); } - log.info("Returning {}{}", matchedArticles.size(), StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); + log.info("Returning {}", StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); return matchedArticles; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index c45bbaf4..86fb247f 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -190,7 +190,7 @@ private static PublicationMatch checkForPublication(@NotNull ExperimentAnnotatio @NotNull Logger log) { // Check if publication checking is enabled (either globally or forced for this run) - if (!forceCheck && !settings.isEnablePublicationCheck()) + if (!forceCheck && !settings.isEnablePublicationSearch()) { log.debug("Publication checking is disabled in settings"); return null; diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java index b8452971..d0dc81e8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java +++ b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java @@ -86,7 +86,7 @@ else if(expAnnotations.getContainer().equals(container)) navTree.addChild("DOI", new ActionURL(PanoramaPublicController.DoiOptionsAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Make Data Public", new ActionURL(PanoramaPublicController.MakePublicAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Post to Bluesky", new ActionURL(PanoramaPublicController.PostToBlueskyOptionsAction.class, container).addParameter("id", expAnnotations.getId())); - navTree.addChild("Search Publications", new ActionURL(PanoramaPublicController.FindPublicationsForDatasetAction.class, container).addParameter("id", expAnnotations.getId())); + navTree.addChild("Search Publications", new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetAction.class, container).addParameter("id", expAnnotations.getId())); JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(expAnnotations); if (submission != null) diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp index a3d2873c..1567f026 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp @@ -64,6 +64,23 @@ } window.location = LABKEY.ActionURL.buildURL("panoramapublic", "sendPrivateDataReminders.view", folderPath); } + + function clickSearchPublicationsLink() + { + const journal = document.getElementById("journal"); + if (!journal) + { + alert("Cannot get journal selector element."); + return; + } + const folderPath = journal.value; + if (!folderPath) + { + alert("Please select a Panorama Public folder first."); + return; + } + window.location = LABKEY.ActionURL.buildURL("panoramapublic", "searchPublications.view", folderPath); + } @@ -129,10 +146,10 @@ - <%=h(PrivateDataReminderSettings.PROP_ENABLE_PUBLICATION_CHECK)%> + <%=h(PrivateDataReminderSettings.PROP_ENABLE_PUBLICATION_SEARCH)%> - /> + />
When enabled, the system will search PubMed Central and PubMed for publications associated with private datasets.
@@ -161,6 +178,7 @@ %> <%=link("Send Reminders Now").onClick("clickSendRemindersLink();").build()%> + <%=link("Search Publications").onClick("clickSearchPublicationsLink();").build()%>
diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp new file mode 100644 index 00000000..9302f251 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp @@ -0,0 +1,233 @@ +<%@ taglib prefix="labkey" uri="http://www.labkey.org/taglib" %> +<%@ page import="org.labkey.api.view.HttpView" %> +<%@ page import="org.labkey.api.view.JspView" %> +<%@ page import="org.labkey.panoramapublic.PanoramaPublicController" %> +<%@ page import="org.labkey.api.view.ActionURL" %> +<%@ page extends="org.labkey.api.jsp.JspBase" %> + +<% + JspView view = HttpView.currentView(); + var form = view.getModelBean(); + ActionURL searchPublicationsUrl = new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetAction.class, getContainer()); + ActionURL searchPublicationsApiUrl = new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetApiAction.class, getContainer()); +%> + + + + + + + +
+
+ Select datasets below, then click "Search Publications" to search PubMed and PMC for associated publications. +
+
+ <%=button("Search Publications").id("search-pub-btn").onClick("searchPublications();")%> +
+
+ +
From 6e2b487f738ba5952ad48a6fd44952ecd94030a3 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 26 Feb 2026 15:09:22 -0800 Subject: [PATCH 12/27] Cleanup searchPublicationsForm.jsp --- .../PanoramaPublicController.java | 48 ++++--- .../view/searchPublicationsForm.jsp | 129 ++++++++++-------- .../view/sendPrivateDataRemindersForm.jsp | 4 +- 3 files changed, 99 insertions(+), 82 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 1fc1faec..5596b2a2 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10278,18 +10278,10 @@ public ModelAndView getView(PrivateDataSendReminderForm form, boolean reshow, Bi if (!reshow) { PrivateDataReminderSettings settings = PrivateDataReminderSettings.get(); - form.setCheckPublications(settings.isEnablePublicationSearch()); + form.setSearchPublications(settings.isEnablePublicationSearch()); } - QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, - PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); - qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); - qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); - - QueryView tableView = new QueryView(new PanoramaPublicSchema(getUser(), getContainer()), qSettings, null); - tableView.setTitle("Private Panorama Private Datasets"); - tableView.setFrame(WebPartView.FrameType.NONE); - tableView.disableContainerFilterSelection(); + QueryView tableView = getPrivateExperimentsQueryView(getViewContext(), getContainer(), getUser()); form.setDataRegionName(tableView.getDataRegionName()); @@ -10314,7 +10306,7 @@ public boolean handlePost(PrivateDataSendReminderForm form, BindException errors JournalManager.getJournal(getContainer()), form.getSelectedExperimentIds(), form.getTestMode(), - form.isCheckPublications()); + form.isSearchPublications()); PipelineService.get().queueJob(job); return true; } @@ -10334,10 +10326,24 @@ public void addNavTrail(NavTree root) } } + private static @NotNull QueryView getPrivateExperimentsQueryView(ViewContext viewContext, Container container, User user) + { + QuerySettings qSettings = new QuerySettings(viewContext, PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, + PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); + qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); + qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); + + QueryView tableView = new QueryView(new PanoramaPublicSchema(user, container), qSettings, null); + tableView.setTitle("Private Panorama Public Datasets"); + tableView.setFrame(WebPartView.FrameType.NONE); + tableView.disableContainerFilterSelection(); + return tableView; + } + public static class PrivateDataSendReminderForm { private boolean _testMode; - private boolean _checkPublications; + private boolean _searchPublications; private String _selectedIds; private String _dataRegionName = null; @@ -10351,14 +10357,14 @@ public void setTestMode(boolean testMode) _testMode = testMode; } - public boolean isCheckPublications() + public boolean isSearchPublications() { - return _checkPublications; + return _searchPublications; } - public void setCheckPublications(boolean checkPublications) + public void setSearchPublications(boolean searchPublications) { - _checkPublications = checkPublications; + _searchPublications = searchPublications; } public String getSelectedIds() @@ -10406,15 +10412,7 @@ public ModelAndView getView(Object form, BindException errors) return new SimpleErrorView(errors, true); } - QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, - PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); - qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); - qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); - - QueryView tableView = new QueryView(new PanoramaPublicSchema(getUser(), getContainer()), qSettings, null); - tableView.setTitle("Private Datasets"); - tableView.setFrame(WebPartView.FrameType.NONE); - tableView.disableContainerFilterSelection(); + QueryView tableView = getPrivateExperimentsQueryView(getViewContext(), getContainer(), getUser()); SearchPublicationsBean bean = new SearchPublicationsBean(); bean.setDataRegionName(tableView.getDataRegionName()); diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp index 9302f251..540d8918 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp @@ -8,12 +8,10 @@ <% JspView view = HttpView.currentView(); var form = view.getModelBean(); - ActionURL searchPublicationsUrl = new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetAction.class, getContainer()); - ActionURL searchPublicationsApiUrl = new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetApiAction.class, getContainer()); %> + + + + + + + + + + + + + + + + + <% if (experiment.hasPxid()) { %> + + + + + <% } %> + <% if (experiment.hasDoi()) { %> + + + + + <% } %> +
Title: + <% if (shortUrl != null) { %> + <%=h(experiment.getTitle())%> + <% } else { %> + <%=h(experiment.getTitle())%> + <% } %> +
Created:<%=h(DateUtil.formatDateTime(experiment.getCreated(), "yyyy-MM-dd"))%>
Submitter:<%=h(submitterName != null ? submitterName : "Unknown")%>
PX ID:<%=simpleLink(experiment.getPxid(), ProteomeXchangeService.toUrl(experiment.getPxid()))%>
DOI:<%=h(experiment.getDoi())%>
+ +<% if (!bean.getMatches().isEmpty()) { %> + + + + + + + + + + + + <% if (bean.isShowDismissedColumn()) { %> + + <% } %> + + <% + int rowIdx = 0; + for (PublicationMatch match : bean.getMatches()) + { + String rowCls = (rowIdx++) % 2 == 0 ? "labkey-alternate-row" : "labkey-row"; + String pubId = match.getPublicationId(); + %> + + + + + <% if (bean.isShowDismissedColumn()) { %> + + <% } %> + + <% } %> +
SelectPublication IDMatchesUser Dismissed
+ + <%=h(match.getPublicationLabel())%><%=h(match.getMatchInfo())%><%=h(pubId.equals(bean.getDismissedPubId()) ? "Yes" : "")%>
+
+ <%=button("Notify Submitter").submit(true)%> +
+ +<% } else { %> + +
No publications found for this dataset.
+ +<% } %> From 19598acc04994bc3315a7c62926f02002774f4c6 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 27 Feb 2026 15:39:53 -0800 Subject: [PATCH 14/27] - Extended NcbiUtils to fetch citations from PubMed Central - Display the citation in the notification message as well as the result page when querying publications for a single dataset. --- .../PanoramaPublicController.java | 10 ++- .../PanoramaPublicNotification.java | 5 +- .../panoramapublic/model/DatasetStatus.java | 4 +- .../ncbi/NcbiPublicationSearchService.java | 21 +++-- .../panoramapublic/ncbi/PublicationMatch.java | 64 +++++++--------- .../pipeline/PrivateDataReminderJob.java | 11 +-- .../proteomexchange/NcbiUtils.java | 76 +++++++++++++++---- .../view/searchPublicationsForDataset.jsp | 7 +- 8 files changed, 125 insertions(+), 73 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index f8964ad2..941b952d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -182,6 +182,7 @@ import org.labkey.panoramapublic.proteomexchange.ExperimentModificationGetter; import org.labkey.panoramapublic.proteomexchange.Formula; import org.labkey.panoramapublic.proteomexchange.NcbiUtils; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeServiceException; import org.labkey.panoramapublic.proteomexchange.PsiInstrumentParser; @@ -7047,7 +7048,7 @@ public boolean handlePost(PublicationDetailsForm form, BindException errors) { if (form.hasPubmedId()) { - Pair linkAndCitation = NcbiUtils.getLinkAndCitation(form.getPubmedId()); + Pair linkAndCitation = NcbiUtils.getPubMedLinkAndCitation(form.getPubmedId()); if (linkAndCitation != null) { form.setLink(linkAndCitation.first); @@ -10721,7 +10722,7 @@ public ModelAndView getView(ExperimentIdForm form, BindException errors) List matches; try { - matches = NcbiPublicationSearchService.searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null); + matches = NcbiPublicationSearchService.searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, true); } catch (Exception e) { @@ -10817,7 +10818,7 @@ public Object execute(ExperimentIdForm form, BindException errors) try { - List matches = NcbiPublicationSearchService.searchForPublication(expAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null); + List matches = NcbiPublicationSearchService.searchForPublication(expAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, false); response.put("success", true); response.put("papersFound", matches.size()); @@ -10906,7 +10907,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws return false; } - PublicationMatch.PublicationType pubType = PublicationMatch.PublicationType.fromString(form.getPublicationType()); + DB pubType = DB.fromString(form.getPublicationType()); if (pubType == null) { errors.reject(ERROR_MSG, "Invalid publication type: " + form.getPublicationType()); @@ -10915,6 +10916,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws // Reconstruct PublicationMatch from form fields PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo()); + selectedMatch.setCitation(NcbiUtils.getCitation(form.getPublicationId(), pubType)); // Post notification JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index eeb6688c..146575c5 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -438,7 +438,10 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations message.append("Great news! We found a publication that appears to be associated with your data on Panorama Public (") .append(shortUrl).append("), which has been private since ").append(dateString).append(".") .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) - .append(NL2).append(bold("Publication Found:")).append(" ").append(link(articleMatch.getPublicationLabel(), articleMatch.getPublicationUrl())) + .append(NL2).append(bold("Publication Found:")).append(" ") + .append(articleMatch.getCitation() != null + ? link(articleMatch.getCitation(), articleMatch.getPublicationUrl()) + : link(articleMatch.getPublicationLabel(), articleMatch.getPublicationUrl())) .append(NL2).append("Since your work has been published, we encourage you to make your data public so the research community can access it alongside your publication. ") .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") .append(bold(link("Make Data Public", makePublicLink))).append(".") diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 5e6c6a3a..3f94b86a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -2,7 +2,7 @@ import org.jetbrains.annotations.Nullable; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; -import org.labkey.panoramapublic.ncbi.PublicationMatch; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import java.util.Date; @@ -100,7 +100,7 @@ public void setPublicationType(String publicationType) public String getPublicationLabel() { - PublicationMatch.PublicationType type = PublicationMatch.PublicationType.fromString(_publicationType); + NcbiUtils.DB type = NcbiUtils.DB.fromString(_publicationType); if (type == null) { return _publicationType != null ? _publicationType : ""; diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 71e5669a..2e02e3d8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -20,7 +20,8 @@ import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.model.ExperimentAnnotations; -import org.labkey.panoramapublic.ncbi.PublicationMatch.PublicationType; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; import java.io.IOException; import java.net.URLEncoder; @@ -90,7 +91,7 @@ private static Logger getLog(@Nullable Logger logger) */ public static PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) { - List matches = searchForPublication(expAnnotations, 1, logger); + List matches = searchForPublication(expAnnotations, 1, logger, true); return matches.isEmpty() ? null : matches.get(0); } @@ -100,7 +101,7 @@ public static PublicationMatch searchForPublication(@NotNull ExperimentAnnotatio * @param maxResults maximum number of article matches to return (capped at 5) * @param logger optional logger; when null, uses the class logger */ - public static List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger) + public static List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) { Logger log = getLog(logger); maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS)); @@ -127,6 +128,16 @@ public static List searchForPublication(@NotNull ExperimentAnn { matchedArticles = matchedArticles.subList(0, maxResults); } + + if (getCitations) + { + // Fetch citations for each match + for (PublicationMatch match : matchedArticles) + { + match.setCitation(NcbiUtils.getCitation(match.getPublicationId(), match.getPublicationType())); + } + } + log.info("Returning {}", StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); return matchedArticles; } @@ -387,7 +398,7 @@ private static List fetchAndVerifyPmcArticles( // Extract PubMed ID, is found String pubMedId = extractPubMedId(articleData); String publicationId = pubMedId == null ? pmcId : pubMedId; - PublicationType publicationType = pubMedId == null ? PublicationType.PMC : PublicationType.PMID; + DB publicationType = pubMedId == null ? DB.PMC : DB.PubMed; // Get strategies that found this article List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); @@ -537,7 +548,7 @@ private static List searchPubMed(ExperimentAnnotations expAnno Date pubDate = parsePublicationDate(articleData, log); PublicationMatch article = new PublicationMatch( pmid, - PublicationType.PMID, + DB.PubMed, false, // Not found by PX ID false, // Not found by Panorama URL false, // Not found by DOI diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 989fba1c..676af92e 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -5,6 +5,9 @@ import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.panoramapublic.model.DatasetStatus; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; + import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -21,44 +24,17 @@ public class PublicationMatch public static final String MATCH_TITLE = "Title"; private final String _publicationId; - private final PublicationType _publicationType; + private final DB _publicationType; private final boolean _matchesProteomeXchangeId; private final boolean _matchesPanoramaUrl; private final boolean _matchesDoi; private final boolean _matchesAuthor; private final boolean _matchesTitle; private final @Nullable Date _publicationDate; + private @Nullable String _citation; - public enum PublicationType - { - PMID("PubMed ID"), - PMC("PMC ID"); - - private final String _label; - - PublicationType(String label) - { - _label = label; - } - - /** Human-readable label, e.g. "PubMed ID" or "PMC ID". */ - public String getLabel() - { - return _label; - } - - public static @Nullable PublicationType fromString(@Nullable String value) - { - if (value == null) return null; - for (PublicationType type : values()) - { - if (type.name().equals(value)) return type; - } - return null; - } - } - public PublicationMatch(String publicationId, PublicationType publicationType, + public PublicationMatch(String publicationId, DB publicationType, boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, boolean matchesAuthor, boolean matchesTitle, @Nullable Date publicationDate) @@ -78,7 +54,7 @@ public String getPublicationId() return _publicationId; } - public PublicationType getPublicationType() + public DB getPublicationType() { return _publicationType; } @@ -113,12 +89,20 @@ public boolean matchesTitle() return _publicationDate; } + public @Nullable String getCitation() + { + return _citation; + } + + public void setCitation(@Nullable String citation) + { + _citation = citation; + } + /** * String representation of what matched for this article. * This is what gets stored in the PublicationMatchInfo database column. * Example: "ProteomeXchange ID, Panorama URL, Author, Title" - * - *

Use {@link #fromMatchInfo(String)} to reconstruct the individual flags. */ public String getMatchInfo() { @@ -133,11 +117,11 @@ public String getMatchInfo() public String getPublicationUrl() { - if (_publicationType == PublicationType.PMC) + if (_publicationType == DB.PMC) { - return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + _publicationId; + return NcbiUtils.getPmcLink(_publicationId); } - return "https://pubmed.ncbi.nlm.nih.gov/" + _publicationId; + return NcbiUtils.getPubmedLink(_publicationId); } public String getPublicationLabel() @@ -153,6 +137,10 @@ public JSONObject toJson() json.put("publicationLabel", getPublicationLabel()); json.put("publicationUrl", getPublicationUrl()); json.put("matchInfo", getMatchInfo()); + if (_citation != null) + { + json.put("citation", _citation); + } return json; } @@ -160,7 +148,7 @@ public JSONObject toJson() * Build a PublicationMatch from a publication ID, type, and match info string. * @see #getMatchInfo() */ - public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull PublicationType publicationType, @Nullable String matchInfo) + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, @Nullable String matchInfo) { boolean pxId = false, url = false, doi = false, author = false, title = false; if (!StringUtils.isBlank(matchInfo)) @@ -190,7 +178,7 @@ public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @Not { return null; } - PublicationType type = PublicationType.fromString(datasetStatus.getPublicationType()); + DB type = DB.fromString(datasetStatus.getPublicationType()); if (type == null) { return null; diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 86fb247f..93016dfb 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -27,6 +27,7 @@ import org.labkey.panoramapublic.model.JournalSubmission; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.ncbi.PublicationMatch; +import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import org.labkey.panoramapublic.query.DatasetStatusManager; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -184,10 +185,10 @@ public static ReminderDecision skip(String reason) { * @param log Logger for diagnostic messages * @return NcbiArticleMatch with search result */ - private static PublicationMatch checkForPublication(@NotNull ExperimentAnnotations expAnnotations, - @NotNull PrivateDataReminderSettings settings, - boolean forceCheck, - @NotNull Logger log) + private static PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, + @NotNull PrivateDataReminderSettings settings, + boolean forceCheck, + @NotNull Logger log) { // Check if publication checking is enabled (either globally or forced for this run) if (!forceCheck && !settings.isEnablePublicationSearch()) @@ -332,7 +333,7 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex } // Check for publications if enabled - PublicationMatch publicationResult = checkForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); + PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); if (!context.isTestMode()) { diff --git a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java index c599dd9f..4bb0c49c 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java +++ b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java @@ -23,6 +23,7 @@ import org.labkey.api.collections.IntHashMap; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.util.Pair; +import org.jetbrains.annotations.Nullable; import org.labkey.api.util.logging.LogHelper; import org.labkey.api.view.AjaxCompletion; import org.w3c.dom.CharacterData; @@ -43,7 +44,6 @@ import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; @@ -55,9 +55,38 @@ public class NcbiUtils private static final String eutilsUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy"; private static final String autoCompUrl = "https://blast.ncbi.nlm.nih.gov/portal/utils/autocomp.fcgi?dict=taxids_sg&q="; - private static final String citationEporterUrl = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; + private static final String pubmedCitationExporterUrl = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pubmed/?format=citation&id="; + private static final String pmcCitationExporterUrl = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pmc/?format=citation&id="; public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 + public enum DB + { + PubMed("PubMed"), + PMC("PubMed Central"); + + private final String _label; + + DB(String label) + { + _label = label; + } + + public String getLabel() + { + return _label; + } + + public static @Nullable DB fromString(@Nullable String database) + { + if (database == null) return null; + for (DB db : values()) + { + if (db.name().equals(database)) return db; + } + return null; + } + } + private static final Logger LOG = LogHelper.getLogger(NcbiUtils.class, "Messages about using the NCBI utilities"); public static List getCompletions(String token) throws PxException @@ -183,19 +212,21 @@ public static Map getScientificNames(List taxIds) thro } /** - * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter (https://api.ncbi.nlm.nih.gov/lit/ctxp). - * Example URL: https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id=28691345 - * @param pubmedId PubMed Id - * @return the PubMed link and the NLM-style citation if the lookup is successful + * Fetches the NLM-style citation for a publication using the NCBI Literature Citation Exporter. + * Supports both PubMed and PMC publication types. + * @param publicationId numeric publication ID (e.g. "28691345") + * @param database PubMed or PMC + * @return the NLM citation string, or null if the lookup fails or the ID is invalid */ - public static Pair getLinkAndCitation (String pubmedId) + public static @Nullable String getCitation(String publicationId, DB database) { - if (pubmedId == null || !pubmedId.matches(PUBMED_ID)) + if (publicationId == null || !publicationId.matches(PUBMED_ID)) { return null; } - String queryUrl = citationEporterUrl + pubmedId; + String baseUrl = database == DB.PMC ? pmcCitationExporterUrl : pubmedCitationExporterUrl; + String queryUrl = baseUrl + publicationId; HttpURLConnection conn = null; try @@ -213,9 +244,7 @@ public static Pair getLinkAndCitation (String pubmedId) { response = IOUtils.toString(in, StandardCharsets.UTF_8); } - String citation = getCitation(response, pubmedId); - - return citation != null ? new Pair<>(getPubmedLink(pubmedId), citation) : null; + return parseCitation(response, publicationId, database); } } catch (IOException e) @@ -229,13 +258,30 @@ public static Pair getLinkAndCitation (String pubmedId) return null; } - private static String getPubmedLink(String pubmedId) + /** + * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter (https://pmc.ncbi.nlm.nih.gov/api/ctxp). + * Example URL: https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id=28691345 + * @param pubmedId PubMed Id + * @return the PubMed link and the NLM-style citation if the lookup is successful + */ + public static Pair getPubMedLinkAndCitation(String pubmedId) + { + String citation = getCitation(pubmedId, DB.PubMed); + return citation != null ? new Pair<>(getPubmedLink(pubmedId), citation) : null; + } + + public static String getPubmedLink(String pubmedId) { // Example: https://pubmed.ncbi.nlm.nih.gov/29331002 return "https://pubmed.ncbi.nlm.nih.gov/" + pubmedId; } - private static String getCitation(String response, String pubmedId) + public static String getPmcLink(String pubmedId) + { + return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + pubmedId; + } + + private static String parseCitation(String response, String publicationId, DB database) { try { @@ -251,7 +297,7 @@ private static String getCitation(String response, String pubmedId) } catch (JSONException e) { - LOG.error("Error parsing response from NCBI Literature Citation Exporter for pubmedID " + pubmedId, e); + LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); } return null; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp index 749cbc9d..eea30be6 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp @@ -3,7 +3,6 @@ <%@ page import="org.labkey.api.view.JspView" %> <%@ page import="org.labkey.api.view.ActionURL" %> <%@ page import="org.labkey.api.util.DateUtil" %> -<%@ page import="org.labkey.api.util.PageFlowUtil" %> <%@ page import="org.labkey.panoramapublic.PanoramaPublicController" %> <%@ page import="org.labkey.panoramapublic.PanoramaPublicController.SearchPublicationsForDatasetBean" %> <%@ page import="org.labkey.panoramapublic.ncbi.PublicationMatch" %> @@ -81,7 +80,7 @@ - + <% if (bean.isShowDismissedColumn()) { %> @@ -100,7 +99,9 @@ data-publicationtype="<%=h(match.getPublicationType().name())%>" data-matchinfo="<%=h(match.getMatchInfo())%>" /> - + <% if (bean.isShowDismissedColumn()) { %> From b96aa3487e6eb90201ea8b977b0918d3a49a7ef3 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 27 Feb 2026 17:22:58 -0800 Subject: [PATCH 15/27] Moved methods to retrieve citations from NcbiUtils to NcbiPublicationSearchService. --- .../PanoramaPublicController.java | 10 +- .../panoramapublic/model/DatasetStatus.java | 4 +- .../panoramapublic/ncbi/NcbiConstants.java | 48 +++++++ .../ncbi/NcbiPublicationSearchService.java | 87 +++++++++++- .../panoramapublic/ncbi/PublicationMatch.java | 7 +- .../pipeline/PrivateDataReminderJob.java | 1 - .../proteomexchange/NcbiUtils.java | 134 +----------------- 7 files changed, 148 insertions(+), 143 deletions(-) create mode 100644 panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 941b952d..aaec59e8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -181,8 +181,10 @@ import org.labkey.panoramapublic.proteomexchange.ChemElement; import org.labkey.panoramapublic.proteomexchange.ExperimentModificationGetter; import org.labkey.panoramapublic.proteomexchange.Formula; +import org.labkey.panoramapublic.ncbi.NcbiConstants; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.proteomexchange.NcbiUtils; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeServiceException; import org.labkey.panoramapublic.proteomexchange.PsiInstrumentParser; @@ -265,7 +267,7 @@ import static org.labkey.api.util.DOM.Attribute.width; import static org.labkey.api.util.DOM.LK.ERRORS; import static org.labkey.api.util.DOM.LK.FORM; -import static org.labkey.panoramapublic.proteomexchange.NcbiUtils.PUBMED_ID; +import static org.labkey.panoramapublic.ncbi.NcbiConstants.PUBMED_ID; /** * User: vsharma @@ -7048,7 +7050,7 @@ public boolean handlePost(PublicationDetailsForm form, BindException errors) { if (form.hasPubmedId()) { - Pair linkAndCitation = NcbiUtils.getPubMedLinkAndCitation(form.getPubmedId()); + Pair linkAndCitation = NcbiPublicationSearchService.getPubMedLinkAndCitation(form.getPubmedId()); if (linkAndCitation != null) { form.setLink(linkAndCitation.first); @@ -10916,7 +10918,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws // Reconstruct PublicationMatch from form fields PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo()); - selectedMatch.setCitation(NcbiUtils.getCitation(form.getPublicationId(), pubType)); + selectedMatch.setCitation(NcbiPublicationSearchService.getCitation(form.getPublicationId(), pubType)); // Post notification JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 3f94b86a..9d042d22 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -2,7 +2,7 @@ import org.jetbrains.annotations.Nullable; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils; +import org.labkey.panoramapublic.ncbi.NcbiConstants; import java.util.Date; @@ -100,7 +100,7 @@ public void setPublicationType(String publicationType) public String getPublicationLabel() { - NcbiUtils.DB type = NcbiUtils.DB.fromString(_publicationType); + NcbiConstants.DB type = NcbiConstants.DB.fromString(_publicationType); if (type == null) { return _publicationType != null ? _publicationType : ""; diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java new file mode 100644 index 00000000..76f22c1c --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java @@ -0,0 +1,48 @@ +package org.labkey.panoramapublic.ncbi; + +import org.jetbrains.annotations.Nullable; + +public class NcbiConstants +{ + /** Regex for validating numeric PubMed/PMC IDs (1-8 digits). */ + public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 + + public enum DB + { + PubMed("PubMed"), + PMC("PubMed Central"); + + private final String _label; + + DB(String label) + { + _label = label; + } + + public String getLabel() + { + return _label; + } + + public static @Nullable DB fromString(@Nullable String database) + { + if (database == null) return null; + for (DB db : values()) + { + if (db.name().equals(database)) return db; + } + return null; + } + } + + public static String getPubmedLink(String pubmedId) + { + // Example: https://pubmed.ncbi.nlm.nih.gov/29331002 + return "https://pubmed.ncbi.nlm.nih.gov/" + pubmedId; + } + + public static String getPmcLink(String pmcId) + { + return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + pmcId; + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 2e02e3d8..7fed2b7f 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -1,5 +1,6 @@ package org.labkey.panoramapublic.ncbi; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hc.client5.http.classic.methods.HttpGet; import org.apache.hc.client5.http.config.ConnectionConfig; @@ -16,14 +17,17 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; +import org.labkey.api.util.Pair; import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.model.ExperimentAnnotations; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.text.ParseException; @@ -55,6 +59,10 @@ public class NcbiPublicationSearchService private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; + // NCBI Literature Citation Exporter endpoints + private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pubmed/?format=citation&id="; + private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pmc/?format=citation&id="; + // API parameters private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec public static final int MAX_RESULTS = 5; @@ -84,6 +92,79 @@ private static Logger getLog(@Nullable Logger logger) return logger != null ? logger : LOG; } + /** + * Fetches the NLM-style citation for a publication using the NCBI Literature Citation Exporter. + * Supports both PubMed and PMC publication types. + * @param publicationId numeric publication ID (e.g. "28691345") + * @param database PubMed or PMC + * @return the NLM citation string, or null if the lookup fails or the ID is invalid + */ + public static @Nullable String getCitation(String publicationId, DB database) + { + if (publicationId == null || !publicationId.matches(NcbiConstants.PUBMED_ID)) + { + return null; + } + + String baseUrl = database == DB.PMC ? PMC_CITATION_EXPORTER_URL : PUBMED_CITATION_EXPORTER_URL; + String queryUrl = baseUrl + publicationId; + + HttpURLConnection conn = null; + try + { + URL url = new URL(queryUrl); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + + int status = conn.getResponseCode(); + + if (status == HttpURLConnection.HTTP_OK) + { + String response; + try (InputStream in = conn.getInputStream()) + { + response = IOUtils.toString(in, StandardCharsets.UTF_8); + } + return parseCitation(response, publicationId, database); + } + } + catch (IOException e) + { + LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); + } + finally + { + if (conn != null) conn.disconnect(); + } + return null; + } + + /** + * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter. + * @param pubmedId PubMed Id + * @return the PubMed link and the NLM-style citation if the lookup is successful + */ + public static Pair getPubMedLinkAndCitation(String pubmedId) + { + String citation = getCitation(pubmedId, DB.PubMed); + return citation != null ? new Pair<>(NcbiConstants.getPubmedLink(pubmedId), citation) : null; + } + + private static String parseCitation(String response, String publicationId, DB database) + { + try + { + var jsonObject = new JSONObject(response); + var nlmInfo = jsonObject.optJSONObject("nlm"); + return null != nlmInfo ? nlmInfo.getString("orig") : null; + } + catch (JSONException e) + { + LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); + } + return null; + } + /** * Search for publications associated with the given experiment. * Returns the top result if multiple results are found @@ -134,7 +215,7 @@ public static List searchForPublication(@NotNull ExperimentAnn // Fetch citations for each match for (PublicationMatch match : matchedArticles) { - match.setCitation(NcbiUtils.getCitation(match.getPublicationId(), match.getPublicationType())); + match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType())); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 676af92e..6591b27d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -5,8 +5,7 @@ import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.panoramapublic.model.DatasetStatus; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils.DB; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import java.util.ArrayList; import java.util.Date; @@ -119,9 +118,9 @@ public String getPublicationUrl() { if (_publicationType == DB.PMC) { - return NcbiUtils.getPmcLink(_publicationId); + return NcbiConstants.getPmcLink(_publicationId); } - return NcbiUtils.getPubmedLink(_publicationId); + return NcbiConstants.getPubmedLink(_publicationId); } public String getPublicationLabel() diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 93016dfb..d3bd267b 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -27,7 +27,6 @@ import org.labkey.panoramapublic.model.JournalSubmission; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.ncbi.PublicationMatch; -import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import org.labkey.panoramapublic.query.DatasetStatusManager; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; diff --git a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java index 4bb0c49c..bb8355e8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java +++ b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java @@ -15,15 +15,10 @@ */ package org.labkey.panoramapublic.proteomexchange; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.Logger; -import org.json.JSONException; import org.json.JSONObject; import org.labkey.api.collections.IntHashMap; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.util.Pair; -import org.jetbrains.annotations.Nullable; import org.labkey.api.util.logging.LogHelper; import org.labkey.api.view.AjaxCompletion; import org.w3c.dom.CharacterData; @@ -38,7 +33,6 @@ import javax.xml.parsers.ParserConfigurationException; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; @@ -49,44 +43,17 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.logging.log4j.Logger; + +/** + * Utilities for NCBI taxonomy lookups (autocomplete and scientific name resolution). + */ public class NcbiUtils { private static final Pattern pattern = Pattern.compile("new Array\\(\"(.*)\"\\),"); private static final String eutilsUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy"; private static final String autoCompUrl = "https://blast.ncbi.nlm.nih.gov/portal/utils/autocomp.fcgi?dict=taxids_sg&q="; - private static final String pubmedCitationExporterUrl = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pubmed/?format=citation&id="; - private static final String pmcCitationExporterUrl = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pmc/?format=citation&id="; - public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 - - public enum DB - { - PubMed("PubMed"), - PMC("PubMed Central"); - - private final String _label; - - DB(String label) - { - _label = label; - } - - public String getLabel() - { - return _label; - } - - public static @Nullable DB fromString(@Nullable String database) - { - if (database == null) return null; - for (DB db : values()) - { - if (db.name().equals(database)) return db; - } - return null; - } - } - private static final Logger LOG = LogHelper.getLogger(NcbiUtils.class, "Messages about using the NCBI utilities"); public static List getCompletions(String token) throws PxException @@ -210,95 +177,4 @@ public static Map getScientificNames(List taxIds) thro } return sciNameMap; } - - /** - * Fetches the NLM-style citation for a publication using the NCBI Literature Citation Exporter. - * Supports both PubMed and PMC publication types. - * @param publicationId numeric publication ID (e.g. "28691345") - * @param database PubMed or PMC - * @return the NLM citation string, or null if the lookup fails or the ID is invalid - */ - public static @Nullable String getCitation(String publicationId, DB database) - { - if (publicationId == null || !publicationId.matches(PUBMED_ID)) - { - return null; - } - - String baseUrl = database == DB.PMC ? pmcCitationExporterUrl : pubmedCitationExporterUrl; - String queryUrl = baseUrl + publicationId; - - HttpURLConnection conn = null; - try - { - URL url = new URL(queryUrl); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - - int status = conn.getResponseCode(); - - if (status == HttpURLConnection.HTTP_OK) - { - String response; - try (InputStream in = conn.getInputStream()) - { - response = IOUtils.toString(in, StandardCharsets.UTF_8); - } - return parseCitation(response, publicationId, database); - } - } - catch (IOException e) - { - LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); - } - finally - { - if(conn != null) conn.disconnect(); - } - return null; - } - - /** - * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter (https://pmc.ncbi.nlm.nih.gov/api/ctxp). - * Example URL: https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id=28691345 - * @param pubmedId PubMed Id - * @return the PubMed link and the NLM-style citation if the lookup is successful - */ - public static Pair getPubMedLinkAndCitation(String pubmedId) - { - String citation = getCitation(pubmedId, DB.PubMed); - return citation != null ? new Pair<>(getPubmedLink(pubmedId), citation) : null; - } - - public static String getPubmedLink(String pubmedId) - { - // Example: https://pubmed.ncbi.nlm.nih.gov/29331002 - return "https://pubmed.ncbi.nlm.nih.gov/" + pubmedId; - } - - public static String getPmcLink(String pubmedId) - { - return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + pubmedId; - } - - private static String parseCitation(String response, String publicationId, DB database) - { - try - { - var jsonObject = new JSONObject(response); - /* We are interested in the NLM style citation. - nlm: { - orig: "ENCODE Project Consortium. An integrated encyclopedia of DNA elements in the human genome. Nature. 2012 Sep 6;489(7414):57-74. doi: 10.1038/nature11247. PMID: 22955616; PMCID: PMC3439153.", - format: "ENCODE Project Consortium. An integrated encyclopedia of DNA elements in the human genome. Nature. 2012 Sep 6;489(7414):57-74. doi: 10.1038/nature11247. PMID: 22955616; PMCID: PMC3439153." - } - */ - var nlmInfo = jsonObject.optJSONObject("nlm"); - return null != nlmInfo ? nlmInfo.getString("orig") : null; - } - catch (JSONException e) - { - LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); - } - return null; - } } From 7eeb821a1502c4dd0affc81bd3ec5c3fbe080add Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Mon, 2 Mar 2026 14:59:18 -0800 Subject: [PATCH 16/27] Added unit tests. --- .../panoramapublic/PanoramaPublicModule.java | 2 + .../ncbi/NcbiPublicationSearchService.java | 486 ++++++++++++++++-- 2 files changed, 454 insertions(+), 34 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java index 51e41c3c..51bb621b 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java @@ -47,6 +47,7 @@ import org.labkey.panoramapublic.bluesky.PanoramaPublicLogoResourceType; import org.labkey.panoramapublic.catalog.CatalogImageAttachmentType; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.speclib.SpecLibKey; import org.labkey.panoramapublic.pipeline.CopyExperimentPipelineProvider; @@ -382,6 +383,7 @@ public Set getSchemaNames() set.add(CatalogEntryManager.TestCase.class); set.add(BlueskyApiClient.TestCase.class); set.add(PrivateDataReminderSettings.TestCase.class); + set.add(NcbiPublicationSearchService.TestCase.class); return set; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 7fed2b7f..6fc747a8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -17,6 +17,8 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Test; import org.labkey.api.util.Pair; import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; @@ -60,8 +62,8 @@ public class NcbiPublicationSearchService private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; // NCBI Literature Citation Exporter endpoints - private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pubmed/?format=citation&id="; - private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/api/ctxp/v1/pmc/?format=citation&id="; + private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; + private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=citation&id="; // API parameters private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec @@ -76,15 +78,27 @@ public class NcbiPublicationSearchService }; // Title keyword extraction - private static final int MIN_KEYWORD_LENGTH = 5; - private static final int MAX_KEYWORDS = 5; + private static final int MIN_KEYWORD_LENGTH = 3; + private static final double KEYWORD_MATCH_THRESHOLD = 0.6; // 60% of keywords must match - // Stop words for title matching + // Stop words for title keyword matching. These words are not meaningful discriminators between papers. + // Function words: articles, prepositions, conjunctions, auxiliary verbs, pronouns. + // Title fillers: common words in paper titles that are not discriminating. private static final Set TITLE_STOP_WORDS = Set.of( - "analysis", "study", "using", "based", "data", "dataset", "proteomics", - "method", "methods", "approach", "application", "investigation", - "examination", "characterization", "identification", "quantification", - "comparison", "evaluation" + + // Function words + "the", "this", "that", "these", "those", "its", // articles / determiners + "for", "with", "from", "into", "upon", "via", "after", "about", // prepositions + "between", "through", "across", "under", "over", "during", + "and", "but", "than", // conjunctions + "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary / modal verbs + "our", "their", // pronouns + "not", "all", "also", "both", "each", "how", "use", "used", // other function words + "here", "well", "two", "one", "more", "most", "only", "such", "other", "which", + + // Title fillers + "using", "based", "reveals", "revealed", "show", "shows", "shown", + "role", "study", "studies" ); private static Logger getLog(@Nullable Logger logger) @@ -714,14 +728,13 @@ private static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) return false; } - // Try sorttitle first, then title - String articleTitle = normalizeTitle(metadata.optString("sorttitle")); - if (articleTitle.isEmpty()) - { - articleTitle = normalizeTitle(metadata.optString("title")); - } + // Try title first, then sorttitle as fallback + String title = normalizeTitle(metadata.optString("title")); + String normalizedArticleTitle = title.isEmpty() + ? normalizeTitle(metadata.optString("sorttitle")) + : title; - if (articleTitle.isEmpty()) + if (normalizedArticleTitle.isEmpty()) { return false; } @@ -729,28 +742,36 @@ private static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) String normalizedDatasetTitle = normalizeTitle(datasetTitle); // Try exact match first - if (articleTitle.equals(normalizedDatasetTitle)) + if (normalizedArticleTitle.equals(normalizedDatasetTitle)) { return true; } - // Fall back to keyword matching - List keywords = extractTitleKeywords(datasetTitle); + // Bi-directional keyword matching: match if either direction meets the threshold. + // This handles cases where one title is much more specific than the other. + return keywordsMatch(datasetTitle, normalizedArticleTitle) + || keywordsMatch(metadata.optString("title", metadata.optString("sorttitle", "")), normalizedDatasetTitle); + } + + /** + * Check if keywords extracted from {@code sourceTitle} are present in {@code normalizedTarget}. + * Returns true if at least 60% of keywords match (with a minimum of 2). + */ + private static boolean keywordsMatch(String sourceTitle, String normalizedTarget) + { + List keywords = extractTitleKeywords(sourceTitle); if (keywords.isEmpty()) { return false; } - // All keywords must be present in article title - for (String keyword : keywords) - { - if (!articleTitle.contains(keyword.toLowerCase())) - { - return false; - } - } + long matchCount = keywords.stream() + .filter(keyword -> normalizedTarget.contains(keyword)) + .count(); - return true; + int required = (int) Math.ceil(keywords.size() * KEYWORD_MATCH_THRESHOLD); + required = Math.max(required, Math.min(2, keywords.size())); + return matchCount >= required; } /** @@ -767,7 +788,8 @@ private static String normalizeTitle(String title) } /** - * Extract meaningful keywords from title for matching + * Extract meaningful keywords from title for matching. + * Returns all qualifying words (no cap) — the caller uses a percentage threshold. */ private static List extractTitleKeywords(String title) { @@ -776,7 +798,6 @@ private static List extractTitleKeywords(String title) return Collections.emptyList(); } - // Extract words of minimum length String[] words = title.toLowerCase().split("\\s+"); List keywords = new ArrayList<>(); @@ -792,10 +813,7 @@ private static List extractTitleKeywords(String title) } } - // Return top N keywords - return keywords.stream() - .limit(MAX_KEYWORDS) - .collect(Collectors.toList()); + return keywords; } /** @@ -874,4 +892,404 @@ private static String quote(String str) { return "\"" + str + "\""; } + + public static class TestCase extends Assert + { + // -- parseCitation tests -- + + @Test + public void testParseCitation() + { + // Valid NLM citation response + String json = "{\"nlm\":{\"orig\":\"Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.\"}}"; + assertEquals("Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.", parseCitation(json, "23689285", DB.PubMed)); + + // Missing nlm key + assertEquals(null, parseCitation("{\"ama\":{\"orig\":\"something\"}}", "23689285", DB.PubMed)); + + // Malformed JSON + assertEquals(null, parseCitation("not json", "23689285", DB.PubMed)); + + // Empty nlm object (missing "orig" key) + assertEquals(null, parseCitation("{\"nlm\":{}}", "23689285", DB.PubMed)); + } + + // -- isPreprint tests -- + + @Test + public void testIsPreprint() + { + // Non-preprint + assertFalse(isPreprint(articleMetadata("J Proteome Res", "Journal of Proteome Research"))); + + // Preprint by source + assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv"))); + assertTrue(isPreprint(articleMetadata("medRxiv", ""))); + + // Preprint by journal name + assertTrue(isPreprint(articleMetadata("", "Research Square"))); + assertTrue(isPreprint(articleMetadata("", "ChemRxiv preprint"))); + + // Case insensitive + assertTrue(isPreprint(articleMetadata("BIORXIV", ""))); + + // Empty fields + assertFalse(isPreprint(articleMetadata("", ""))); + } + + // -- checkAuthorMatch tests -- + + @Test + public void testCheckAuthorMatch() + { + // Standard match: "Sharma V" matches firstName=Vagisha, lastName=Sharma + JSONObject metadata = metadataWithAuthors("Sharma V", "Jones AB", "Smith CD"); + assertTrue(checkAuthorMatch(metadata, "Vagisha", "Sharma")); + + // Match is case-insensitive + assertTrue(checkAuthorMatch(metadataWithAuthors("sharma v"), "Vagisha", "Sharma")); + assertTrue(checkAuthorMatch(metadataWithAuthors("SHARMA V"), "vagisha", "sharma")); + + // No matching author + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones AB", "Smith CD"), "Vagisha", "Sharma")); + + // Blank first or last name + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "", "Sharma")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "Vagisha", "")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), null, "Sharma")); + + // Empty authors array + assertFalse(checkAuthorMatch(new JSONObject(), "Vagisha", "Sharma")); + + // Author with full first name: "Sharma Vagisha" + assertTrue(checkAuthorMatch(metadataWithAuthors("Sharma Vagisha"), "Vagisha", "Sharma")); + } + + // -- checkTitleMatch tests -- + + @Test + public void testCheckTitleMatchExact() + { + // Exact match (case-insensitive, punctuation-stripped) + JSONObject metadata = metadataWithTitle("Quantitative Proteomics of Muscle Fibers"); + assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers")); + assertTrue(checkTitleMatch(metadata, "quantitative proteomics of muscle fibers")); + assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers!")); + } + + @Test + public void testCheckTitleMatchKeywords() + { + // Article: "quantitative proteomics reveals muscle fibers composition" + // Dataset: "muscle fibers composition proteomics quantitative patterns" + // Dataset keywords: "muscle", "fibers", "composition", "proteomics", "quantitative", "patterns" (6 keywords) + // Matches in article: "muscle", "fibers", "composition", "proteomics", "quantitative" (5 of 6 = 83%) -> pass + JSONObject metadata = metadataWithTitle("A study of quantitative proteomics reveals muscle fibers composition"); + assertTrue(checkTitleMatch(metadata, "muscle fibers composition proteomics quantitative patterns")); + + // Keywords not present in article title — well below 60% + assertFalse(checkTitleMatch(metadata, "Novel cardiac lipids quantitation in mouse tissue")); + } + + @Test + public void testCheckTitleMatchThreshold() + { + // Article contains: "phosphoproteomics", "analysis", "lung", "cancer", "cell", "lines" + JSONObject metadata = metadataWithTitle("Phosphoproteomics analysis of lung cancer cell lines"); + + // 4 of 5 keywords match (80%) -> pass (threshold is 60%) + assertTrue(checkTitleMatch(metadata, "phosphoproteomics lung cancer cell biomarkers")); + + // Only 2 of 5 keywords match (40%) -> fail + assertFalse(checkTitleMatch(metadata, "phosphoproteomics kidney heart liver cancer")); + } + + @Test + public void testCheckTitleMatchShortKeywords() + { + // Short but meaningful words (3-4 chars) should be extracted as keywords + JSONObject metadata = metadataWithTitle("DIA proteomics of lung cell iron metabolism in mice"); + assertTrue(checkTitleMatch(metadata, "DIA lung cell iron mice proteomics")); + } + + @Test + public void testCheckTitleMatchBidirectional() + { + // Forward direction fails: dataset is very specific, article is short. + // Dataset keywords: "comprehensive", "phosphoproteomics", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) + // Only "lung", "cancer" found in article (2 of 9 = 22%) -> forward fails + // + // Reverse direction passes: article keywords: "lung", "cancer", "proteomics" (3) + // All 3 found in dataset -> 100% -> reverse passes + JSONObject metadata = metadataWithTitle("Lung cancer proteomics"); + assertTrue(checkTitleMatch(metadata, + "Comprehensive phosphoproteomics analysis reveals novel biomarkers in lung cancer cell lines")); + + // Both directions fail — completely unrelated titles + assertFalse(checkTitleMatch(metadataWithTitle("Cardiac tissue lipidomics"), + "Hepatic transcriptomics in zebrafish embryos")); + } + + @Test + public void testCheckTitleMatchEdgeCases() + { + // Blank dataset title + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), "")); + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), null)); + + // Missing title in metadata + assertFalse(checkTitleMatch(new JSONObject(), "Some title")); + + // Dataset title with only stop words and short words below MIN_KEYWORD_LENGTH + // "a the and for with" -> all stop words or < 3 chars -> no keywords + assertFalse(checkTitleMatch(metadataWithTitle("Different title entirely"), "a the and for with")); + } + + @Test + public void testCheckTitleMatchSorttitleFallback() + { + // When "title" is missing, falls back to "sorttitle" + JSONObject metadata = new JSONObject(); + metadata.put("sorttitle", "phosphoproteomics of lung cancer"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + + // When "title" is present, it is used (not "sorttitle") + metadata = new JSONObject(); + metadata.put("title", "Phosphoproteomics of Lung Cancer"); + metadata.put("sorttitle", "completely different sort title"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + } + + // -- extractTitleKeywords tests -- + + @Test + public void testExtractTitleKeywords() + { + // Normal title — all non-stop words >= 3 chars are extracted + List keywords = extractTitleKeywords("Novel phosphoproteomics workflow for cardiac tissue samples"); + assertTrue(keywords.contains("novel")); + assertTrue(keywords.contains("phosphoproteomics")); + assertTrue(keywords.contains("workflow")); + assertTrue(keywords.contains("cardiac")); + assertTrue(keywords.contains("tissue")); + assertTrue(keywords.contains("samples")); + assertFalse("'for' is too short", keywords.contains("for")); + + // Domain-specific words are NOT stop words — they should be kept + keywords = extractTitleKeywords("Quantitative proteomics characterization identification analysis"); + assertTrue("'proteomics' is domain-specific, not a stop word", keywords.contains("proteomics")); + assertTrue("'characterization' is domain-specific", keywords.contains("characterization")); + assertTrue("'identification' is domain-specific", keywords.contains("identification")); + assertTrue("'analysis' is domain-specific", keywords.contains("analysis")); + assertTrue(keywords.contains("quantitative")); + + // Function words and title fillers are excluded + keywords = extractTitleKeywords("the study using based reveals role"); + assertFalse("'the' is a function word", keywords.contains("the")); + assertFalse("'study' is a title filler", keywords.contains("study")); + assertFalse("'using' is a title filler", keywords.contains("using")); + assertFalse("'based' is a title filler", keywords.contains("based")); + assertFalse("'reveals' is a title filler", keywords.contains("reveals")); + assertFalse("'role' is a title filler", keywords.contains("role")); + + // Short meaningful words (>= 3 chars) are kept + keywords = extractTitleKeywords("DIA analysis of lung cell iron metabolism in mice"); + assertTrue("'dia' (3 chars) should be kept", keywords.contains("dia")); + assertTrue("'lung' (4 chars) should be kept", keywords.contains("lung")); + assertTrue("'cell' (4 chars) should be kept", keywords.contains("cell")); + assertTrue("'iron' (4 chars) should be kept", keywords.contains("iron")); + assertTrue("'mice' (4 chars) should be kept", keywords.contains("mice")); + assertFalse("'of' (2 chars) is too short", keywords.contains("of")); + assertFalse("'in' (2 chars) is too short", keywords.contains("in")); + + // No cap on number of keywords + keywords = extractTitleKeywords("alpha bravo charlie delta foxtrot hotel india juliet kilo lima"); + assertEquals(10, keywords.size()); + + // Blank input + assertTrue(extractTitleKeywords("").isEmpty()); + assertTrue(extractTitleKeywords(null).isEmpty()); + } + + // -- normalizeTitle tests -- + + @Test + public void testNormalizeTitle() + { + assertEquals("hello world", normalizeTitle("Hello, World!")); + assertEquals("testdriven development", normalizeTitle("Test-Driven Development")); + assertEquals("multiple spaces become one", normalizeTitle(" Multiple spaces become one ")); + assertEquals("", normalizeTitle(null)); + assertEquals("", normalizeTitle("")); + } + + // -- extractPubMedId tests -- + + @Test + public void testExtractPubMedId() + { + // PMC metadata with PMID + JSONObject metadata = new JSONObject(); + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", "28691345")); + metadata.put("articleids", articleIds); + assertEquals("28691345", extractPubMedId(metadata)); + + // No PMID in articleids + metadata = new JSONObject(); + articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + metadata.put("articleids", articleIds); + assertNull(extractPubMedId(metadata)); + + // No articleids key + assertNull(extractPubMedId(new JSONObject())); + } + + // -- parsePublicationDate tests -- + + @Test + public void testParsePublicationDate() + { + // "YYYY Mon DD" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan 15"), LOG)); + + // "YYYY Mon" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan"), LOG)); + + // "YYYY" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024"), LOG)); + + // Falls back to epubdate when pubdate is blank + assertNotNull(parsePublicationDate(metadataWithDate("epubdate", "2024 Mar 01"), LOG)); + + // Unparseable date + assertNull(parsePublicationDate(metadataWithDate("pubdate", "not-a-date"), LOG)); + + // Empty date fields + assertNull(parsePublicationDate(new JSONObject(), LOG)); + } + + // -- applyPriorityFiltering tests -- + + @Test + public void testApplyPriorityFiltering() + { + Date refDate = new Date(); + + // Single article returned as-is + PublicationMatch single = createMatch("111", true, false, false, false, false, refDate); + List result = applyPriorityFiltering(List.of(single), refDate, LOG); + assertEquals(1, result.size()); + + // Articles found by multiple data IDs preferred over single-ID matches + PublicationMatch multiId = createMatch("222", true, true, false, true, true, refDate); + PublicationMatch singleId = createMatch("333", true, false, false, true, true, refDate); + result = applyPriorityFiltering(List.of(singleId, multiId), refDate, LOG); + assertEquals(1, result.size()); + assertEquals("222", result.get(0).getPublicationId()); + + // Among single-ID matches, author+title both matching preferred + PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, refDate); + PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, refDate); + result = applyPriorityFiltering(List.of(authorOnly, bothMatch), refDate, LOG); + assertEquals(1, result.size()); + assertEquals("444", result.get(0).getPublicationId()); + } + + @Test + public void testSortByDateProximity() + { + Date refDate = new Date(); + Date closer = new Date(refDate.getTime() - 86400000L); // 1 day before + Date farther = new Date(refDate.getTime() - 86400000L * 365); // 1 year before + + PublicationMatch farMatch = createMatch("111", true, false, false, false, false, farther); + PublicationMatch closeMatch = createMatch("222", true, false, false, false, false, closer); + PublicationMatch noDate = createMatch("333", true, false, false, false, false, null); + + List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), refDate); + assertEquals("222", result.get(0).getPublicationId()); // closest + assertEquals("111", result.get(1).getPublicationId()); // farther + assertEquals("333", result.get(2).getPublicationId()); // no date last + } + + // -- PublicationMatch round-trip tests -- + + @Test + public void testPublicationMatchRoundTrip() + { + PublicationMatch original = new PublicationMatch("12345", DB.PubMed, true, true, false, true, false, null); + assertEquals("ProteomeXchange ID, Panorama URL, Author", original.getMatchInfo()); + + PublicationMatch restored = PublicationMatch.fromMatchInfo("12345", DB.PubMed, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertFalse(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertFalse(restored.matchesTitle()); + + // All flags + original = new PublicationMatch("67890", DB.PMC, true, true, true, true, true, null); + assertEquals("ProteomeXchange ID, Panorama URL, DOI, Author, Title", original.getMatchInfo()); + restored = PublicationMatch.fromMatchInfo("67890", DB.PMC, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertTrue(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertTrue(restored.matchesTitle()); + + // Empty match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, ""); + assertFalse(restored.matchesProteomeXchangeId()); + assertFalse(restored.matchesAuthor()); + + // Null match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, null); + assertFalse(restored.matchesProteomeXchangeId()); + } + + // -- Helper methods for building test JSON -- + + private static JSONObject articleMetadata(String source, String fullJournalName) + { + JSONObject metadata = new JSONObject(); + metadata.put("source", source); + metadata.put("fulljournalname", fullJournalName); + return metadata; + } + + private static JSONObject metadataWithAuthors(String... authorNames) + { + JSONObject metadata = new JSONObject(); + JSONArray authors = new JSONArray(); + for (String name : authorNames) + { + authors.put(new JSONObject().put("name", name)); + } + metadata.put("authors", authors); + return metadata; + } + + private static JSONObject metadataWithTitle(String title) + { + JSONObject metadata = new JSONObject(); + metadata.put("title", title); + return metadata; + } + + private static JSONObject metadataWithDate(String field, String dateStr) + { + JSONObject metadata = new JSONObject(); + metadata.put(field, dateStr); + return metadata; + } + + private static PublicationMatch createMatch(String id, boolean pxId, boolean url, boolean doi, + boolean author, boolean title, @Nullable Date pubDate) + { + return new PublicationMatch(id, DB.PubMed, pxId, url, doi, author, title, pubDate); + } + } } From e43a3ee5bd83ecc4bd7f7c39c4b38356c638e2ec Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 3 Mar 2026 14:25:12 -0800 Subject: [PATCH 17/27] - Added Selenium test - Mock NCBI service for TeamCity tests - Fixed notification messages --- .../PanoramaPublicController.java | 86 +- .../panoramapublic/PanoramaPublicModule.java | 4 +- .../PanoramaPublicNotification.java | 30 +- .../ncbi/NcbiPublicationSearchService.java | 1282 +---------------- .../panoramapublic/ncbi/PublicationMatch.java | 2 +- .../pipeline/PrivateDataReminderJob.java | 2 +- .../PanoramaPublicBaseTest.java | 63 +- .../PrivateDataReminderTest.java | 36 +- .../panoramapublic/PublicationSearchTest.java | 242 ++++ 9 files changed, 407 insertions(+), 1340 deletions(-) create mode 100644 panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index aaec59e8..464cfbc4 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -155,7 +155,9 @@ import org.labkey.panoramapublic.datacite.DoiMetadata; import org.labkey.panoramapublic.message.PrivateDataMessageScheduler; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.MockNcbiPublicationSearchService; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchServiceImpl; import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.model.CatalogEntry; import org.labkey.panoramapublic.model.DataLicense; @@ -183,7 +185,6 @@ import org.labkey.panoramapublic.proteomexchange.Formula; import org.labkey.panoramapublic.ncbi.NcbiConstants; import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; -import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeServiceException; @@ -7050,7 +7051,7 @@ public boolean handlePost(PublicationDetailsForm form, BindException errors) { if (form.hasPubmedId()) { - Pair linkAndCitation = NcbiPublicationSearchService.getPubMedLinkAndCitation(form.getPubmedId()); + Pair linkAndCitation = NcbiPublicationSearchService.get().getPubMedLinkAndCitation(form.getPubmedId()); if (linkAndCitation != null) { form.setLink(linkAndCitation.first); @@ -10648,6 +10649,7 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) public static class DismissPublicationSuggestionAction extends UpdateDatasetStatusAction { private PublicationMatch _publicationMatch; + @Override protected String getConfirmViewTitle() { @@ -10657,9 +10659,13 @@ protected String getConfirmViewTitle() @Override protected String getConfirmViewMessage() { + String publicationRef = _publicationMatch.getCitation() != null + ? _publicationMatch.getCitation() + : _publicationMatch.getPublicationLabel(); return "You are dismissing the publication suggestion for your data on Panorama Public at " - + _exptAnnotations.getShortUrl().renderShortURL() + - "We will no longer suggest " + _publicationMatch.getPublicationLabel() + " for this dataset."; + + _exptAnnotations.getShortUrl().renderShortURL() + + ". We will no longer suggest the following publication for this dataset - " + + "\n\n" + publicationRef; } @Override @@ -10676,6 +10682,15 @@ else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPublication())) errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + " has already been dismissed"); } + else + { + // Fetch the citation from NCBI. If we cannot retrieve it, the publication ID will be used as a fallback. + String citation = NcbiPublicationSearchService.get().getCitation(_publicationMatch.getPublicationId(), _publicationMatch.getPublicationType()); + if (!StringUtils.isBlank(citation)) + { + _publicationMatch.setCitation(citation); + } + } } @Override @@ -10690,7 +10705,7 @@ protected void postNotification() // Post a message to the support thread. JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(_exptAnnotations); Journal journal = JournalManager.getJournal(submission.getJournalId()); - PanoramaPublicNotification.postPublicationDismissalMessage(journal, submission.getJournalExperiment(), _exptAnnotations, getUser(), _datasetStatus); + PanoramaPublicNotification.postPublicationDismissalMessage(journal, submission.getJournalExperiment(), _exptAnnotations, getUser(), _publicationMatch); } @Override @@ -10724,7 +10739,7 @@ public ModelAndView getView(ExperimentIdForm form, BindException errors) List matches; try { - matches = NcbiPublicationSearchService.searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, true); + matches = NcbiPublicationSearchService.get().searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, true); } catch (Exception e) { @@ -10820,7 +10835,7 @@ public Object execute(ExperimentIdForm form, BindException errors) try { - List matches = NcbiPublicationSearchService.searchForPublication(expAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, false); + List matches = NcbiPublicationSearchService.get().searchForPublication(expAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, false); response.put("success", true); response.put("papersFound", matches.size()); @@ -10909,6 +10924,17 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws return false; } + // Check if the user has already dismissed this publication suggestion + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); + if (datasetStatus != null && Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication()) + && form.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + { + errors.reject(ERROR_MSG, "The user has already dismissed the publication suggestion " + + datasetStatus.getPublicationLabel() + " " + datasetStatus.getPotentialPublicationId() + + " for this dataset."); + return false; + } + DB pubType = DB.fromString(form.getPublicationType()); if (pubType == null) { @@ -10918,7 +10944,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws // Reconstruct PublicationMatch from form fields PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo()); - selectedMatch.setCitation(NcbiPublicationSearchService.getCitation(form.getPublicationId(), pubType)); + selectedMatch.setCitation(NcbiPublicationSearchService.get().getCitation(form.getPublicationId(), pubType)); // Post notification JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); @@ -10961,7 +10987,6 @@ journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, _announcement, _announcementsContainer, getUser(), selectedMatch); // Update DatasetStatus - DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); if (datasetStatus == null) { datasetStatus = new DatasetStatus(); @@ -11161,6 +11186,49 @@ public static ActionURL getCatalogImageDownloadUrl(ExperimentAnnotations expAnno .addParameter("name", filename); } + // ======================== Test support actions for Selenium tests ======================== + + @RequiresSiteAdmin + public static class SetupMockNcbiServiceAction extends ReadOnlyApiAction + { + @Override + public Object execute(SetupMockForm form, BindException errors) + { + if (form.isCheckNcbiReachable() && NcbiPublicationSearchServiceImpl.isNcbiReachable()) + { + return new ApiSimpleResponse("mock", false); + } + NcbiPublicationSearchServiceImpl.setInstance(new MockNcbiPublicationSearchService()); + return new ApiSimpleResponse("mock", true); + } + } + + public static class SetupMockForm + { + private boolean _checkNcbiReachable; + + public boolean isCheckNcbiReachable() + { + return _checkNcbiReachable; + } + + public void setCheckNcbiReachable(boolean checkNcbiReachable) + { + _checkNcbiReachable = checkNcbiReachable; + } + } + + @RequiresSiteAdmin + public static class RestoreNcbiServiceAction extends ReadOnlyApiAction + { + @Override + public Object execute(Object form, BindException errors) + { + NcbiPublicationSearchServiceImpl.setInstance(new NcbiPublicationSearchServiceImpl()); + return new ApiSimpleResponse("restored", true); + } + } + public static class TestCase extends AbstractActionPermissionTest { @Override diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java index 51bb621b..1e18391a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java @@ -47,7 +47,7 @@ import org.labkey.panoramapublic.bluesky.PanoramaPublicLogoResourceType; import org.labkey.panoramapublic.catalog.CatalogImageAttachmentType; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; -import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchServiceImpl; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.speclib.SpecLibKey; import org.labkey.panoramapublic.pipeline.CopyExperimentPipelineProvider; @@ -383,7 +383,7 @@ public Set getSchemaNames() set.add(CatalogEntryManager.TestCase.class); set.add(BlueskyApiClient.TestCase.class); set.add(PrivateDataReminderSettings.TestCase.class); - set.add(NcbiPublicationSearchService.TestCase.class); + set.add(NcbiPublicationSearchServiceImpl.TestCase.class); return set; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index 146575c5..e38a28e6 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -359,7 +359,8 @@ public static void postDataDeletionRequestMessage(@NotNull Journal journal, @Not postNotificationFullTitle(journal, je, messageBody.toString(), journalAdmin, messageTitle, AnnouncementService.StatusOption.Active, null); } - public static void postPublicationDismissalMessage(@NotNull Journal journal, @NotNull JournalExperiment je, @NotNull ExperimentAnnotations expAnnotations, User submitter, @Nullable org.labkey.panoramapublic.model.DatasetStatus datasetStatus) + public static void postPublicationDismissalMessage(@NotNull Journal journal, @NotNull JournalExperiment je, @NotNull ExperimentAnnotations expAnnotations, User submitter, + @NotNull PublicationMatch publicationMatch) { User journalAdmin = JournalManager.getJournalAdminUser(journal); if (journalAdmin == null) @@ -371,14 +372,15 @@ public static void postPublicationDismissalMessage(@NotNull Journal journal, @No StringBuilder messageBody = new StringBuilder(); messageBody.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); - messageBody.append("Thank you for letting us know that the suggested publication is not associated with your data on Panorama Public."); + messageBody.append("Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public."); - if (datasetStatus != null && !StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(publicationMatch.getPublicationLabel()); + if (!StringUtils.isBlank(publicationMatch.getCitation())) { - messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(datasetStatus.getPublicationLabel()).append(" ").append(datasetStatus.getPotentialPublicationId()); + messageBody.append(NL).append(publicationMatch.getCitation()); } - messageBody.append(NL2).append("We will no longer suggest this publication for your dataset. ") + messageBody.append(NL2).append("We will no longer suggest this paper for your dataset. ") .append("If you would like to make your data public, you can do so at any time ") .append("by clicking the \"Make Public\" button in your data folder, or by clicking this link: ") .append(bold(link("Make Data Public", PanoramaPublicController.getMakePublicUrl(expAnnotations.getId(), expAnnotations.getContainer()).getURIString()))) @@ -397,7 +399,7 @@ public static void postPrivateDataReminderMessage(@NotNull Journal journal, @Not { String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, articleMatch); String title = articleMatch != null - ? "Congratulations! We Found a Publication Associated with Your Data on Panorama Public" + ? "Action Required: Publication Found for Your Data on Panorama Public" : "Action Required: Status Update for Your Private Data on Panorama Public"; postNotificationFullTitle(journal, js.getJournalExperiment(), message, messagePoster, title, AnnouncementService.StatusOption.Closed, notifyUsers); } @@ -435,25 +437,17 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations if (articleMatch != null) { // Message variant when a publication was found - message.append("Great news! We found a publication that appears to be associated with your data on Panorama Public (") - .append(shortUrl).append("), which has been private since ").append(dateString).append(".") + message.append("We found a paper that appears to be associated with your private data on Panorama Public.") .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) .append(NL2).append(bold("Publication Found:")).append(" ") .append(articleMatch.getCitation() != null ? link(articleMatch.getCitation(), articleMatch.getPublicationUrl()) : link(articleMatch.getPublicationLabel(), articleMatch.getPublicationUrl())) - .append(NL2).append("Since your work has been published, we encourage you to make your data public so the research community can access it alongside your publication. ") + .append(NL2).append("If this is indeed your paper, congratulations! We encourage you to make your data public so the research community can access it alongside your paper. ") .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") .append(bold(link("Make Data Public", makePublicLink))).append(".") - .append(NL2).append(bold("If this publication is not associated with your data:")) - .append(NL).append("Please let us know by ").append(bold(link("clicking here", dismissPublicationUrl.getURIString()))) - .append(", and we will stop suggesting this publication for your dataset.") - .append(NL2).append(bold("Not ready to make your data public yet?")) - .append(NL).append("If you need more time, you can:") - .append(NL).append(" - ").append(bold(link("Request an Extension", requestExtensionUrl.getURIString()))) - .append(" - This will give you additional time before we send another reminder.") - .append(NL).append(" - ").append(bold(link("Request Deletion", requestDeletionUrl.getURIString()))) - .append(" - If you no longer wish to host your data on Panorama Public."); + .append(NL2).append("If this paper is not associated with your data please let us know by clicking ") + .append(bold(link("Dismiss Publication Suggestion", dismissPublicationUrl.getURIString()))); } else { diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index 6fc747a8..d01e45f2 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -1,1295 +1,31 @@ package org.labkey.panoramapublic.ncbi; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.hc.client5.http.classic.methods.HttpGet; -import org.apache.hc.client5.http.config.ConnectionConfig; -import org.apache.hc.client5.http.config.RequestConfig; -import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; -import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; -import org.apache.hc.client5.http.impl.io.BasicHttpClientConnectionManager; -import org.apache.hc.client5.http.HttpResponseException; -import org.apache.hc.core5.http.io.entity.EntityUtils; -import org.apache.hc.core5.util.Timeout; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.junit.Assert; -import org.junit.Test; import org.labkey.api.util.Pair; -import org.labkey.api.util.StringUtilsLabKey; -import org.labkey.api.util.logging.LogHelper; -import org.labkey.panoramapublic.datacite.DataCiteService; import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; -import java.io.IOException; -import java.io.InputStream; -import java.net.HttpURLConnection; -import java.net.URL; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_DOI; -import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PANORAMA_URL; -import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PX_ID; /** - * Service for searching PubMed Central (PMC) and PubMed for publications associated with private Panorama Public datasets. + * Service for searching PubMed Central (PMC) and PubMed for publications associated with Panorama Public datasets. */ -public class NcbiPublicationSearchService +public interface NcbiPublicationSearchService { - private static final Logger LOG = LogHelper.getLogger(NcbiPublicationSearchService.class, "Search NCBI for publications associated with Panorama Public datasets"); - - // NCBI API endpoints - private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; - private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; - - // NCBI Literature Citation Exporter endpoints - private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; - private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=citation&id="; - - // API parameters - private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec - public static final int MAX_RESULTS = 5; - private static final int TIMEOUT_MS = 10000; // 10 seconds - private static final String NCBI_EMAIL = "panorama@proteinms.net"; - - // Preprint indicators - private static final String[] PREPRINT_INDICATORS = { - "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", - "research square", "preprints.org" - }; - - // Title keyword extraction - private static final int MIN_KEYWORD_LENGTH = 3; - private static final double KEYWORD_MATCH_THRESHOLD = 0.6; // 60% of keywords must match - - // Stop words for title keyword matching. These words are not meaningful discriminators between papers. - // Function words: articles, prepositions, conjunctions, auxiliary verbs, pronouns. - // Title fillers: common words in paper titles that are not discriminating. - private static final Set TITLE_STOP_WORDS = Set.of( - - // Function words - "the", "this", "that", "these", "those", "its", // articles / determiners - "for", "with", "from", "into", "upon", "via", "after", "about", // prepositions - "between", "through", "across", "under", "over", "during", - "and", "but", "than", // conjunctions - "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary / modal verbs - "our", "their", // pronouns - "not", "all", "also", "both", "each", "how", "use", "used", // other function words - "here", "well", "two", "one", "more", "most", "only", "such", "other", "which", - - // Title fillers - "using", "based", "reveals", "revealed", "show", "shows", "shown", - "role", "study", "studies" - ); - - private static Logger getLog(@Nullable Logger logger) - { - return logger != null ? logger : LOG; - } - - /** - * Fetches the NLM-style citation for a publication using the NCBI Literature Citation Exporter. - * Supports both PubMed and PMC publication types. - * @param publicationId numeric publication ID (e.g. "28691345") - * @param database PubMed or PMC - * @return the NLM citation string, or null if the lookup fails or the ID is invalid - */ - public static @Nullable String getCitation(String publicationId, DB database) - { - if (publicationId == null || !publicationId.matches(NcbiConstants.PUBMED_ID)) - { - return null; - } - - String baseUrl = database == DB.PMC ? PMC_CITATION_EXPORTER_URL : PUBMED_CITATION_EXPORTER_URL; - String queryUrl = baseUrl + publicationId; - - HttpURLConnection conn = null; - try - { - URL url = new URL(queryUrl); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - - int status = conn.getResponseCode(); - - if (status == HttpURLConnection.HTTP_OK) - { - String response; - try (InputStream in = conn.getInputStream()) - { - response = IOUtils.toString(in, StandardCharsets.UTF_8); - } - return parseCitation(response, publicationId, database); - } - } - catch (IOException e) - { - LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); - } - finally - { - if (conn != null) conn.disconnect(); - } - return null; - } - - /** - * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter. - * @param pubmedId PubMed Id - * @return the PubMed link and the NLM-style citation if the lookup is successful - */ - public static Pair getPubMedLinkAndCitation(String pubmedId) - { - String citation = getCitation(pubmedId, DB.PubMed); - return citation != null ? new Pair<>(NcbiConstants.getPubmedLink(pubmedId), citation) : null; - } - - private static String parseCitation(String response, String publicationId, DB database) - { - try - { - var jsonObject = new JSONObject(response); - var nlmInfo = jsonObject.optJSONObject("nlm"); - return null != nlmInfo ? nlmInfo.getString("orig") : null; - } - catch (JSONException e) - { - LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); - } - return null; - } - - /** - * Search for publications associated with the given experiment. - * Returns the top result if multiple results are found - * @param logger optional logger; when null, uses the class logger - */ - public static PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) - { - List matches = searchForPublication(expAnnotations, 1, logger, true); - return matches.isEmpty() ? null : matches.get(0); - } - - /** - * Search for publications associated with the given experiment. - * Search PubMed Central first, then fall back to PubMed if needed. - * @param maxResults maximum number of article matches to return (capped at 5) - * @param logger optional logger; when null, uses the class logger - */ - public static List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) - { - Logger log = getLog(logger); - maxResults = Math.max(1, Math.min(maxResults, MAX_RESULTS)); - log.info("Starting publication search for experiment: {}", expAnnotations.getId()); - - // Search PubMed Central first - List matchedArticles = searchPmc(expAnnotations, log); - - // If no PMC results, fall back to PubMed - if (matchedArticles.isEmpty()) - { - log.info("No PMC articles found, trying PubMed fallback"); - matchedArticles = searchPubMed(expAnnotations, log); - } - - // Build and return result - if (matchedArticles.isEmpty()) - { - log.info("No publications found"); - return Collections.emptyList(); - } - - if (matchedArticles.size() > maxResults) - { - matchedArticles = matchedArticles.subList(0, maxResults); - } - - if (getCitations) - { - // Fetch citations for each match - for (PublicationMatch match : matchedArticles) - { - match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType())); - } - } - - log.info("Returning {}", StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); - return matchedArticles; - } - - /** - * Search PubMed Central - */ - private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) - { - // Track PMC IDs found by each strategy - Map> pmcIdsByStrategy = new HashMap<>(); - - // Step 1: Search PMC by PX ID - if (!StringUtils.isBlank(expAnnotations.getPxid())) - { - log.debug("Searching PMC by PX ID: {}", expAnnotations.getPxid()); - List ids = searchPmc(quote(expAnnotations.getPxid()), log); - if (!ids.isEmpty()) - { - pmcIdsByStrategy.put(MATCH_PX_ID, ids); - log.debug("Found {} PMC articles by PX ID", ids.size()); - } - rateLimit(); - } - - // Step 2: Search PMC by Panorama URL - if (expAnnotations.getShortUrl() != null) - { - String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); - log.debug("Searching PMC by Panorama URL: {}", panoramaUrl); - List ids = searchPmc(quote(panoramaUrl), log); - if (!ids.isEmpty()) - { - pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); - log.debug("Found {} PMC articles by Panorama URL", ids.size()); - } - rateLimit(); - } - - // Step 3: Search PMC by DOI - if (!StringUtils.isBlank(expAnnotations.getDoi())) - { - String doiUrl = expAnnotations.getDoi().startsWith("http") - ? expAnnotations.getDoi() - : DataCiteService.toUrl(expAnnotations.getDoi()); - log.debug("Searching PMC by DOI: {}", doiUrl); - List ids = searchPmc(quote(doiUrl), log); - if (!ids.isEmpty()) - { - pmcIdsByStrategy.put(MATCH_DOI, ids); - log.debug("Found {} PMC articles by DOI", ids.size()); - } - rateLimit(); - } - - // Accumulate unique PMC IDs and track which strategies found each - Set uniquePmcIds = new HashSet<>(); - Map> idToStrategies = new HashMap<>(); - - for (Map.Entry> entry : pmcIdsByStrategy.entrySet()) - { - String strategy = entry.getKey(); - for (String id : entry.getValue()) - { - uniquePmcIds.add(id); - idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy); - } - } - - log.info("Total unique PMC IDs found: {}", uniquePmcIds.size()); - - // Fetch and verify PMC articles - List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); - - // Apply priority filtering - return applyPriorityFiltering(pmcArticles, expAnnotations.getCreated(), log); - } - - /** - * Search PubMed Central with the given query - */ - private static List searchPmc(String query, Logger log) - { - return executeSearch(query, "pmc", log); - } - - /** - * Search PubMed with the given query - */ - private static List searchPubMed(String query, Logger log) - { - return executeSearch(query, "pubmed", log); - } - - /** - * Execute search using NCBI ESearch API - */ - private static List executeSearch(String query, String database, Logger log) - { - String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); - String url = ESEARCH_URL + - "?db=" + database + - "&term=" + encodedQuery + - "&retmax=" + MAX_RESULTS + - "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); - - try - { - JSONObject json = getJson(url); - JSONObject eSearchResult = json.getJSONObject("esearchresult"); - JSONArray idList = eSearchResult.getJSONArray("idlist"); - - List ids = new ArrayList<>(); - for (int i = 0; i < idList.length(); i++) - { - ids.add(idList.getString(i)); - } - return ids; - } - catch (IOException | JSONException e) - { - log.error("Error searching {} with query: {}", database, query, e); - return Collections.emptyList(); - } - } - - /** - * Execute an HTTP GET request and parse the response as JSON. - * @throws IOException if the request fails or the server returns a non-2xx response - * @throws JSONException if the response body is not valid JSON - */ - private static JSONObject getJson(String url) throws IOException - { - ConnectionConfig connectionConfig = ConnectionConfig.custom() - .setConnectTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) - .setSocketTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) - .build(); - - RequestConfig requestConfig = RequestConfig.custom() - .setResponseTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) - .build(); - - BasicHttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(); - connectionManager.setConnectionConfig(connectionConfig); - - try (CloseableHttpClient client = HttpClientBuilder.create() - .setDefaultRequestConfig(requestConfig) - .setConnectionManager(connectionManager) - .build()) - { - HttpGet get = new HttpGet(url); - return client.execute(get, response -> { - int status = response.getCode(); - if (status < 200 || status >= 300) - { - throw new HttpResponseException(status, response.getReasonPhrase()); - } - String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); - return new JSONObject(body); - }); - } - } - - /** - * Fetch metadata for PMC articles using ESummary API - */ - private static Map fetchPmcMetadata(Collection pmcIds, Logger log) - { - return fetchMetadata(pmcIds, "pmc", log); - } - - /** - * Fetch metadata for PubMed articles using ESummary API - */ - private static Map fetchPubMedMetadata(Collection pmids, Logger log) - { - return fetchMetadata(pmids, "pubmed", log); - } - - /** - * Fetch metadata using NCBI ESummary API (batch request) - */ - private static Map fetchMetadata(Collection ids, String database, Logger log) - { - if (ids.isEmpty()) return Collections.emptyMap(); - - String idString = String.join(",", ids); - String url = ESUMMARY_URL + - "?db=" + database + - "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + - "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); - - try - { - JSONObject json = getJson(url); - JSONObject result = json.optJSONObject("result"); - if (result == null) return Collections.emptyMap(); - - // Extract metadata for each ID - Map metadata = new HashMap<>(); - for (String id : ids) - { - JSONObject articleData = result.optJSONObject(id); - if (articleData != null) - { - metadata.put(id, articleData); - } - } - return metadata; - } - catch (IOException | JSONException e) - { - log.error("Error fetching {} metadata for IDs: {}", database, ids, e); - return Collections.emptyMap(); - } - } - - /** - * Fetch and verify PMC articles (filter preprints, check author/title matches) - */ - private static List fetchAndVerifyPmcArticles( - Set pmcIds, - Map> idToStrategies, - ExperimentAnnotations expAnnotations, - Logger log) - { - if (pmcIds.isEmpty()) return Collections.emptyList(); - - // Fetch all metadata in batch - Map metadata = fetchPmcMetadata(pmcIds, log); - - List articles = new ArrayList<>(); - String firstName = expAnnotations.getSubmitterUser() != null - ? expAnnotations.getSubmitterUser().getFirstName() : null; - String lastName = expAnnotations.getSubmitterUser() != null - ? expAnnotations.getSubmitterUser().getLastName() : null; - - for (String pmcId : pmcIds) - { - JSONObject articleData = metadata.get(pmcId); - if (articleData == null) continue; - - // Filter out preprints - if (isPreprint(articleData)) - { - String source = articleData.optString("source", ""); - String journal = articleData.optString("fulljournalname", ""); - log.info("Excluded preprint PMC{}: source=\"{}\", journal=\"{}\"", pmcId, source, journal); - continue; - } - - // Check author and title matches - boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); - boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); - - // Extract PubMed ID, is found - String pubMedId = extractPubMedId(articleData); - String publicationId = pubMedId == null ? pmcId : pubMedId; - DB publicationType = pubMedId == null ? DB.PMC : DB.PubMed; - - // Get strategies that found this article - List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); - - // Map strategies to boolean flags - boolean foundByPxId = strategies.contains(MATCH_PX_ID); - boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); - boolean foundByDoi = strategies.contains(MATCH_DOI); - - Date pubDate = parsePublicationDate(articleData, log); - - PublicationMatch article = new PublicationMatch( - publicationId, - publicationType, - foundByPxId, - foundByUrl, - foundByDoi, - authorMatch, - titleMatch, - pubDate - ); - articles.add(article); - - log.info("PMC{} -> {} {} | Match info: {}", pmcId, publicationType, publicationId, article.getMatchInfo()); - } - - return articles; - } - - /** - * Apply priority filtering to narrow down results - */ - private static List applyPriorityFiltering(List articles, @NotNull Date referenceDate, Logger log) - { - if (articles.size() <= 1) return articles; - - // Priority 1: Filter to articles found by multiple data IDs (most reliable) - List multipleIds = articles.stream() - .filter(a -> countDataIdMatches(a) >= 2) - .collect(Collectors.toList()); - - if (!multipleIds.isEmpty()) - { - log.debug("Filtered to {} article(s) found by multiple IDs", multipleIds.size()); - articles = multipleIds; - } - - if (articles.size() <= 1) return sortByDateProximity(articles, referenceDate); - - // Priority 2: Filter to articles with both Author AND Title match - List bothMatches = articles.stream() - .filter(a -> a.matchesAuthor() && a.matchesTitle()) - .collect(Collectors.toList()); - - if (!bothMatches.isEmpty()) - { - log.debug("Filtered to {} article(s) with both Author and Title match", bothMatches.size()); - articles = bothMatches; - } - - return sortByDateProximity(articles, referenceDate); - } - - /** - * Sort articles by proximity of publication date to the reference date (experiment creation date). - * Articles with dates closest to the reference date come first. - * Articles without a publication date are sorted to the end. - */ - private static List sortByDateProximity(List articles, @NotNull Date referenceDate) - { - if (articles.size() <= 1) - { - return articles; - } - long refTime = referenceDate.getTime(); - articles = new ArrayList<>(articles); - articles.sort(Comparator.comparingLong(a -> - a.getPublicationDate() != null ? Math.abs(a.getPublicationDate().getTime() - refTime) : Long.MAX_VALUE - )); - return articles; - } - - private static int countDataIdMatches(PublicationMatch a) - { - int count = 0; - if (a.matchesProteomeXchangeId()) count++; - if (a.matchesPanoramaUrl()) count++; - if (a.matchesDoi()) count++; - return count; - } - - /** - * Fall back to PubMed search if PMC finds nothing - */ - private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) - { - String firstName = expAnnotations.getSubmitterUser() != null - ? expAnnotations.getSubmitterUser().getFirstName() : null; - String lastName = expAnnotations.getSubmitterUser() != null - ? expAnnotations.getSubmitterUser().getLastName() : null; - String title = expAnnotations.getTitle(); - - if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName) || StringUtils.isBlank(title)) - { - log.info("Cannot perform PubMed fallback - missing author or title information"); - return Collections.emptyList(); - } - - // Search PubMed: "LastName FirstName[Author] AND Title NOT preprint[Publication Type]" - String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", - lastName, firstName, title); - - log.debug("PubMed fallback query: {}", query); - List pmids = searchPubMed(query, log); - - if (pmids.isEmpty()) - { - log.info("PubMed fallback found no results"); - return Collections.emptyList(); - } - - log.info("PubMed fallback found {} result(s), verifying...", pmids.size()); - - // Fetch metadata and verify - Map metadata = fetchPubMedMetadata(pmids, log); - - List articles = new ArrayList<>(); - for (String pmid : pmids) - { - JSONObject articleData = metadata.get(pmid); - if (articleData == null) continue; - - // Filter out preprints - if (isPreprint(articleData)) - { - log.info("Excluded preprint PMID {}", pmid); - continue; - } - - // Verify both author AND title match - boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); - boolean titleMatch = checkTitleMatch(articleData, title); - - // Only accept if BOTH match - if (authorMatch && titleMatch) - { - Date pubDate = parsePublicationDate(articleData, log); - PublicationMatch article = new PublicationMatch( - pmid, - DB.PubMed, - false, // Not found by PX ID - false, // Not found by Panorama URL - false, // Not found by DOI - true, // Matched by author - true, // Matched by title - pubDate - ); - articles.add(article); - log.info("PMID {} verified with both Author and Title match", pmid); - } - } - - log.info("Verified {} of {} PubMed results", articles.size(), pmids.size()); - return articles; - } - - /** - * Check if article is a preprint - */ - private static boolean isPreprint(JSONObject metadata) - { - String source = metadata.optString("source", "").toLowerCase(); - String journal = metadata.optString("fulljournalname", "").toLowerCase(); - - for (String indicator : PREPRINT_INDICATORS) - { - if (source.contains(indicator) || journal.contains(indicator)) - { - return true; - } - } - return false; - } - - /** - * Check if article matches the author (submitter) - */ - private static boolean checkAuthorMatch(JSONObject metadata, String firstName, String lastName) - { - if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName)) - { - return false; - } - - JSONArray authors = metadata.optJSONArray("authors"); - if (authors == null || authors.isEmpty()) - { - return false; - } - - String firstInitial = firstName.substring(0, 1).toLowerCase(); - String lastNameLower = lastName.toLowerCase(); - - for (int i = 0; i < authors.length(); i++) - { - JSONObject author = authors.optJSONObject(i); - if (author == null) continue; - - String authorName = author.optString("name", "").toLowerCase(); - - // Match format: "LastName FirstInitial" or "LastName F" - if (authorName.startsWith(lastNameLower)) - { - String afterLastName = authorName.substring(lastNameLower.length()).trim(); - if (afterLastName.startsWith(firstInitial)) - { - return true; - } - } - } - - return false; - } - - /** - * Check if article title matches the dataset title - */ - private static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) - { - if (StringUtils.isBlank(datasetTitle)) - { - return false; - } - - // Try title first, then sorttitle as fallback - String title = normalizeTitle(metadata.optString("title")); - String normalizedArticleTitle = title.isEmpty() - ? normalizeTitle(metadata.optString("sorttitle")) - : title; - - if (normalizedArticleTitle.isEmpty()) - { - return false; - } - - String normalizedDatasetTitle = normalizeTitle(datasetTitle); - - // Try exact match first - if (normalizedArticleTitle.equals(normalizedDatasetTitle)) - { - return true; - } - - // Bi-directional keyword matching: match if either direction meets the threshold. - // This handles cases where one title is much more specific than the other. - return keywordsMatch(datasetTitle, normalizedArticleTitle) - || keywordsMatch(metadata.optString("title", metadata.optString("sorttitle", "")), normalizedDatasetTitle); - } - - /** - * Check if keywords extracted from {@code sourceTitle} are present in {@code normalizedTarget}. - * Returns true if at least 60% of keywords match (with a minimum of 2). - */ - private static boolean keywordsMatch(String sourceTitle, String normalizedTarget) - { - List keywords = extractTitleKeywords(sourceTitle); - if (keywords.isEmpty()) - { - return false; - } - - long matchCount = keywords.stream() - .filter(keyword -> normalizedTarget.contains(keyword)) - .count(); - - int required = (int) Math.ceil(keywords.size() * KEYWORD_MATCH_THRESHOLD); - required = Math.max(required, Math.min(2, keywords.size())); - return matchCount >= required; - } - - /** - * Normalize title for comparison (lowercase, remove punctuation, normalize whitespace) - */ - private static String normalizeTitle(String title) - { - if (title == null) return ""; - - return title.toLowerCase() - .replaceAll("[^\\w\\s]", "") // Remove punctuation - .replaceAll("\\s+", " ") // Normalize whitespace - .trim(); - } - - /** - * Extract meaningful keywords from title for matching. - * Returns all qualifying words (no cap) — the caller uses a percentage threshold. - */ - private static List extractTitleKeywords(String title) - { - if (StringUtils.isBlank(title)) - { - return Collections.emptyList(); - } - - String[] words = title.toLowerCase().split("\\s+"); - List keywords = new ArrayList<>(); - - for (String word : words) - { - // Remove non-alphanumeric characters - word = word.replaceAll("[^a-z0-9]", ""); - - // Check length and exclude stop words - if (word.length() >= MIN_KEYWORD_LENGTH && !TITLE_STOP_WORDS.contains(word)) - { - keywords.add(word); - } - } - - return keywords; - } - - /** - * Extract PMID from PMC metadata - */ - private static @Nullable String extractPubMedId(JSONObject pmcMetadata) - { - // PMC articles contain PMID in articleids array - JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); - if (articleIds != null) - { - for (int i = 0; i < articleIds.length(); i++) - { - JSONObject idObj = articleIds.optJSONObject(i); - if (idObj != null && "pmid".equals(idObj.optString("idtype"))) - { - return idObj.optString("value"); - } - } - } - - return null; - } - - /** - * Parse publication date from ESummary metadata. - * Tries "pubdate" first, then "epubdate". Dates are typically in "YYYY Mon DD" or "YYYY Mon" format. - */ - private static @Nullable Date parsePublicationDate(JSONObject metadata, Logger log) - { - String dateStr = metadata.optString("pubdate", ""); - if (StringUtils.isBlank(dateStr)) - { - dateStr = metadata.optString("epubdate", ""); - } - if (StringUtils.isBlank(dateStr)) - { - return null; - } - - // NCBI dates are typically "YYYY Mon DD", "YYYY Mon", or "YYYY" - String[] formats = {"yyyy MMM dd", "yyyy MMM", "yyyy"}; - for (String format : formats) - { - try - { - return new SimpleDateFormat(format).parse(dateStr); - } - catch (ParseException ignored) - { - } - } - log.debug("Unable to parse publication date: {}", dateStr); - return null; - } - - /** - * Rate limiting: wait 400ms between API requests - */ - private static void rateLimit() - { - try - { - Thread.sleep(RATE_LIMIT_DELAY_MS); - } - catch (InterruptedException e) - { - Thread.currentThread().interrupt(); - } - } + int MAX_RESULTS = 5; - /** - * Wrap string in quotes for exact match search - */ - private static String quote(String str) + static NcbiPublicationSearchService get() { - return "\"" + str + "\""; + return NcbiPublicationSearchServiceImpl.getInstance(); } - public static class TestCase extends Assert - { - // -- parseCitation tests -- - - @Test - public void testParseCitation() - { - // Valid NLM citation response - String json = "{\"nlm\":{\"orig\":\"Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.\"}}"; - assertEquals("Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.", parseCitation(json, "23689285", DB.PubMed)); - - // Missing nlm key - assertEquals(null, parseCitation("{\"ama\":{\"orig\":\"something\"}}", "23689285", DB.PubMed)); - - // Malformed JSON - assertEquals(null, parseCitation("not json", "23689285", DB.PubMed)); - - // Empty nlm object (missing "orig" key) - assertEquals(null, parseCitation("{\"nlm\":{}}", "23689285", DB.PubMed)); - } - - // -- isPreprint tests -- - - @Test - public void testIsPreprint() - { - // Non-preprint - assertFalse(isPreprint(articleMetadata("J Proteome Res", "Journal of Proteome Research"))); - - // Preprint by source - assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv"))); - assertTrue(isPreprint(articleMetadata("medRxiv", ""))); - - // Preprint by journal name - assertTrue(isPreprint(articleMetadata("", "Research Square"))); - assertTrue(isPreprint(articleMetadata("", "ChemRxiv preprint"))); - - // Case insensitive - assertTrue(isPreprint(articleMetadata("BIORXIV", ""))); - - // Empty fields - assertFalse(isPreprint(articleMetadata("", ""))); - } + @Nullable String getCitation(String publicationId, DB database); - // -- checkAuthorMatch tests -- + @Nullable Pair getPubMedLinkAndCitation(String pubmedId); - @Test - public void testCheckAuthorMatch() - { - // Standard match: "Sharma V" matches firstName=Vagisha, lastName=Sharma - JSONObject metadata = metadataWithAuthors("Sharma V", "Jones AB", "Smith CD"); - assertTrue(checkAuthorMatch(metadata, "Vagisha", "Sharma")); + @Nullable PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger); - // Match is case-insensitive - assertTrue(checkAuthorMatch(metadataWithAuthors("sharma v"), "Vagisha", "Sharma")); - assertTrue(checkAuthorMatch(metadataWithAuthors("SHARMA V"), "vagisha", "sharma")); - - // No matching author - assertFalse(checkAuthorMatch(metadataWithAuthors("Jones AB", "Smith CD"), "Vagisha", "Sharma")); - - // Blank first or last name - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "", "Sharma")); - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "Vagisha", "")); - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), null, "Sharma")); - - // Empty authors array - assertFalse(checkAuthorMatch(new JSONObject(), "Vagisha", "Sharma")); - - // Author with full first name: "Sharma Vagisha" - assertTrue(checkAuthorMatch(metadataWithAuthors("Sharma Vagisha"), "Vagisha", "Sharma")); - } - - // -- checkTitleMatch tests -- - - @Test - public void testCheckTitleMatchExact() - { - // Exact match (case-insensitive, punctuation-stripped) - JSONObject metadata = metadataWithTitle("Quantitative Proteomics of Muscle Fibers"); - assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers")); - assertTrue(checkTitleMatch(metadata, "quantitative proteomics of muscle fibers")); - assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers!")); - } - - @Test - public void testCheckTitleMatchKeywords() - { - // Article: "quantitative proteomics reveals muscle fibers composition" - // Dataset: "muscle fibers composition proteomics quantitative patterns" - // Dataset keywords: "muscle", "fibers", "composition", "proteomics", "quantitative", "patterns" (6 keywords) - // Matches in article: "muscle", "fibers", "composition", "proteomics", "quantitative" (5 of 6 = 83%) -> pass - JSONObject metadata = metadataWithTitle("A study of quantitative proteomics reveals muscle fibers composition"); - assertTrue(checkTitleMatch(metadata, "muscle fibers composition proteomics quantitative patterns")); - - // Keywords not present in article title — well below 60% - assertFalse(checkTitleMatch(metadata, "Novel cardiac lipids quantitation in mouse tissue")); - } - - @Test - public void testCheckTitleMatchThreshold() - { - // Article contains: "phosphoproteomics", "analysis", "lung", "cancer", "cell", "lines" - JSONObject metadata = metadataWithTitle("Phosphoproteomics analysis of lung cancer cell lines"); - - // 4 of 5 keywords match (80%) -> pass (threshold is 60%) - assertTrue(checkTitleMatch(metadata, "phosphoproteomics lung cancer cell biomarkers")); - - // Only 2 of 5 keywords match (40%) -> fail - assertFalse(checkTitleMatch(metadata, "phosphoproteomics kidney heart liver cancer")); - } - - @Test - public void testCheckTitleMatchShortKeywords() - { - // Short but meaningful words (3-4 chars) should be extracted as keywords - JSONObject metadata = metadataWithTitle("DIA proteomics of lung cell iron metabolism in mice"); - assertTrue(checkTitleMatch(metadata, "DIA lung cell iron mice proteomics")); - } - - @Test - public void testCheckTitleMatchBidirectional() - { - // Forward direction fails: dataset is very specific, article is short. - // Dataset keywords: "comprehensive", "phosphoproteomics", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) - // Only "lung", "cancer" found in article (2 of 9 = 22%) -> forward fails - // - // Reverse direction passes: article keywords: "lung", "cancer", "proteomics" (3) - // All 3 found in dataset -> 100% -> reverse passes - JSONObject metadata = metadataWithTitle("Lung cancer proteomics"); - assertTrue(checkTitleMatch(metadata, - "Comprehensive phosphoproteomics analysis reveals novel biomarkers in lung cancer cell lines")); - - // Both directions fail — completely unrelated titles - assertFalse(checkTitleMatch(metadataWithTitle("Cardiac tissue lipidomics"), - "Hepatic transcriptomics in zebrafish embryos")); - } - - @Test - public void testCheckTitleMatchEdgeCases() - { - // Blank dataset title - assertFalse(checkTitleMatch(metadataWithTitle("Some article"), "")); - assertFalse(checkTitleMatch(metadataWithTitle("Some article"), null)); - - // Missing title in metadata - assertFalse(checkTitleMatch(new JSONObject(), "Some title")); - - // Dataset title with only stop words and short words below MIN_KEYWORD_LENGTH - // "a the and for with" -> all stop words or < 3 chars -> no keywords - assertFalse(checkTitleMatch(metadataWithTitle("Different title entirely"), "a the and for with")); - } - - @Test - public void testCheckTitleMatchSorttitleFallback() - { - // When "title" is missing, falls back to "sorttitle" - JSONObject metadata = new JSONObject(); - metadata.put("sorttitle", "phosphoproteomics of lung cancer"); - assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); - - // When "title" is present, it is used (not "sorttitle") - metadata = new JSONObject(); - metadata.put("title", "Phosphoproteomics of Lung Cancer"); - metadata.put("sorttitle", "completely different sort title"); - assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); - } - - // -- extractTitleKeywords tests -- - - @Test - public void testExtractTitleKeywords() - { - // Normal title — all non-stop words >= 3 chars are extracted - List keywords = extractTitleKeywords("Novel phosphoproteomics workflow for cardiac tissue samples"); - assertTrue(keywords.contains("novel")); - assertTrue(keywords.contains("phosphoproteomics")); - assertTrue(keywords.contains("workflow")); - assertTrue(keywords.contains("cardiac")); - assertTrue(keywords.contains("tissue")); - assertTrue(keywords.contains("samples")); - assertFalse("'for' is too short", keywords.contains("for")); - - // Domain-specific words are NOT stop words — they should be kept - keywords = extractTitleKeywords("Quantitative proteomics characterization identification analysis"); - assertTrue("'proteomics' is domain-specific, not a stop word", keywords.contains("proteomics")); - assertTrue("'characterization' is domain-specific", keywords.contains("characterization")); - assertTrue("'identification' is domain-specific", keywords.contains("identification")); - assertTrue("'analysis' is domain-specific", keywords.contains("analysis")); - assertTrue(keywords.contains("quantitative")); - - // Function words and title fillers are excluded - keywords = extractTitleKeywords("the study using based reveals role"); - assertFalse("'the' is a function word", keywords.contains("the")); - assertFalse("'study' is a title filler", keywords.contains("study")); - assertFalse("'using' is a title filler", keywords.contains("using")); - assertFalse("'based' is a title filler", keywords.contains("based")); - assertFalse("'reveals' is a title filler", keywords.contains("reveals")); - assertFalse("'role' is a title filler", keywords.contains("role")); - - // Short meaningful words (>= 3 chars) are kept - keywords = extractTitleKeywords("DIA analysis of lung cell iron metabolism in mice"); - assertTrue("'dia' (3 chars) should be kept", keywords.contains("dia")); - assertTrue("'lung' (4 chars) should be kept", keywords.contains("lung")); - assertTrue("'cell' (4 chars) should be kept", keywords.contains("cell")); - assertTrue("'iron' (4 chars) should be kept", keywords.contains("iron")); - assertTrue("'mice' (4 chars) should be kept", keywords.contains("mice")); - assertFalse("'of' (2 chars) is too short", keywords.contains("of")); - assertFalse("'in' (2 chars) is too short", keywords.contains("in")); - - // No cap on number of keywords - keywords = extractTitleKeywords("alpha bravo charlie delta foxtrot hotel india juliet kilo lima"); - assertEquals(10, keywords.size()); - - // Blank input - assertTrue(extractTitleKeywords("").isEmpty()); - assertTrue(extractTitleKeywords(null).isEmpty()); - } - - // -- normalizeTitle tests -- - - @Test - public void testNormalizeTitle() - { - assertEquals("hello world", normalizeTitle("Hello, World!")); - assertEquals("testdriven development", normalizeTitle("Test-Driven Development")); - assertEquals("multiple spaces become one", normalizeTitle(" Multiple spaces become one ")); - assertEquals("", normalizeTitle(null)); - assertEquals("", normalizeTitle("")); - } - - // -- extractPubMedId tests -- - - @Test - public void testExtractPubMedId() - { - // PMC metadata with PMID - JSONObject metadata = new JSONObject(); - JSONArray articleIds = new JSONArray(); - articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); - articleIds.put(new JSONObject().put("idtype", "pmid").put("value", "28691345")); - metadata.put("articleids", articleIds); - assertEquals("28691345", extractPubMedId(metadata)); - - // No PMID in articleids - metadata = new JSONObject(); - articleIds = new JSONArray(); - articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); - metadata.put("articleids", articleIds); - assertNull(extractPubMedId(metadata)); - - // No articleids key - assertNull(extractPubMedId(new JSONObject())); - } - - // -- parsePublicationDate tests -- - - @Test - public void testParsePublicationDate() - { - // "YYYY Mon DD" format - assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan 15"), LOG)); - - // "YYYY Mon" format - assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan"), LOG)); - - // "YYYY" format - assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024"), LOG)); - - // Falls back to epubdate when pubdate is blank - assertNotNull(parsePublicationDate(metadataWithDate("epubdate", "2024 Mar 01"), LOG)); - - // Unparseable date - assertNull(parsePublicationDate(metadataWithDate("pubdate", "not-a-date"), LOG)); - - // Empty date fields - assertNull(parsePublicationDate(new JSONObject(), LOG)); - } - - // -- applyPriorityFiltering tests -- - - @Test - public void testApplyPriorityFiltering() - { - Date refDate = new Date(); - - // Single article returned as-is - PublicationMatch single = createMatch("111", true, false, false, false, false, refDate); - List result = applyPriorityFiltering(List.of(single), refDate, LOG); - assertEquals(1, result.size()); - - // Articles found by multiple data IDs preferred over single-ID matches - PublicationMatch multiId = createMatch("222", true, true, false, true, true, refDate); - PublicationMatch singleId = createMatch("333", true, false, false, true, true, refDate); - result = applyPriorityFiltering(List.of(singleId, multiId), refDate, LOG); - assertEquals(1, result.size()); - assertEquals("222", result.get(0).getPublicationId()); - - // Among single-ID matches, author+title both matching preferred - PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, refDate); - PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, refDate); - result = applyPriorityFiltering(List.of(authorOnly, bothMatch), refDate, LOG); - assertEquals(1, result.size()); - assertEquals("444", result.get(0).getPublicationId()); - } - - @Test - public void testSortByDateProximity() - { - Date refDate = new Date(); - Date closer = new Date(refDate.getTime() - 86400000L); // 1 day before - Date farther = new Date(refDate.getTime() - 86400000L * 365); // 1 year before - - PublicationMatch farMatch = createMatch("111", true, false, false, false, false, farther); - PublicationMatch closeMatch = createMatch("222", true, false, false, false, false, closer); - PublicationMatch noDate = createMatch("333", true, false, false, false, false, null); - - List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), refDate); - assertEquals("222", result.get(0).getPublicationId()); // closest - assertEquals("111", result.get(1).getPublicationId()); // farther - assertEquals("333", result.get(2).getPublicationId()); // no date last - } - - // -- PublicationMatch round-trip tests -- - - @Test - public void testPublicationMatchRoundTrip() - { - PublicationMatch original = new PublicationMatch("12345", DB.PubMed, true, true, false, true, false, null); - assertEquals("ProteomeXchange ID, Panorama URL, Author", original.getMatchInfo()); - - PublicationMatch restored = PublicationMatch.fromMatchInfo("12345", DB.PubMed, original.getMatchInfo()); - assertTrue(restored.matchesProteomeXchangeId()); - assertTrue(restored.matchesPanoramaUrl()); - assertFalse(restored.matchesDoi()); - assertTrue(restored.matchesAuthor()); - assertFalse(restored.matchesTitle()); - - // All flags - original = new PublicationMatch("67890", DB.PMC, true, true, true, true, true, null); - assertEquals("ProteomeXchange ID, Panorama URL, DOI, Author, Title", original.getMatchInfo()); - restored = PublicationMatch.fromMatchInfo("67890", DB.PMC, original.getMatchInfo()); - assertTrue(restored.matchesProteomeXchangeId()); - assertTrue(restored.matchesPanoramaUrl()); - assertTrue(restored.matchesDoi()); - assertTrue(restored.matchesAuthor()); - assertTrue(restored.matchesTitle()); - - // Empty match info - restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, ""); - assertFalse(restored.matchesProteomeXchangeId()); - assertFalse(restored.matchesAuthor()); - - // Null match info - restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, null); - assertFalse(restored.matchesProteomeXchangeId()); - } - - // -- Helper methods for building test JSON -- - - private static JSONObject articleMetadata(String source, String fullJournalName) - { - JSONObject metadata = new JSONObject(); - metadata.put("source", source); - metadata.put("fulljournalname", fullJournalName); - return metadata; - } - - private static JSONObject metadataWithAuthors(String... authorNames) - { - JSONObject metadata = new JSONObject(); - JSONArray authors = new JSONArray(); - for (String name : authorNames) - { - authors.put(new JSONObject().put("name", name)); - } - metadata.put("authors", authors); - return metadata; - } - - private static JSONObject metadataWithTitle(String title) - { - JSONObject metadata = new JSONObject(); - metadata.put("title", title); - return metadata; - } - - private static JSONObject metadataWithDate(String field, String dateStr) - { - JSONObject metadata = new JSONObject(); - metadata.put(field, dateStr); - return metadata; - } - - private static PublicationMatch createMatch(String id, boolean pxId, boolean url, boolean doi, - boolean author, boolean title, @Nullable Date pubDate) - { - return new PublicationMatch(id, DB.PubMed, pxId, url, doi, author, title, pubDate); - } - } + List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations); } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 6591b27d..52fb9bc0 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -125,7 +125,7 @@ public String getPublicationUrl() public String getPublicationLabel() { - return _publicationType.getLabel() + " " + _publicationId; + return _publicationType.getLabel() + " ID " + _publicationId; } public JSONObject toJson() diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index d3bd267b..8aef9276 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -220,7 +220,7 @@ private static PublicationMatch searchForPublication(@NotNull ExperimentAnnotati log.info(String.format("Searching for publications for experiment %d", expAnnotations.getId())); try { - return NcbiPublicationSearchService.searchForPublication(expAnnotations, log); + return NcbiPublicationSearchService.get().searchForPublication(expAnnotations, log); } catch (Exception e) { diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java index 9dc6eea8..28b5b520 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java @@ -223,10 +223,15 @@ void setupSubfolder(String projectName, String parentFolderName, String folderNa } void updateSubmitterAccountInfo(String lastName) + { + updateSubmitterAccountInfo(lastName, null); + } + + void updateSubmitterAccountInfo(String lastName, String firstName) { goToMyAccount(); clickButton("Edit"); - setFormElement(Locator.name("quf_FirstName"), "Submitter"); + setFormElement(Locator.name("quf_FirstName"), firstName != null ? firstName : "Submitter"); setFormElement(Locator.name("quf_LastName"), lastName); clickButton("Submit"); } @@ -490,20 +495,27 @@ public File getSampleDataPath(String file) return TestFileUtils.getSampleData("TargetedMS/" + getSampleDataFolder() + file); } - protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitter, @Nullable String submitterLastName, + protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitterEmail, @Nullable String submitterLastName, + @Nullable String admin, String skylineDocName) + { + return setupFolderSubmitAndCopy(projectName, folderName, targetFolder, experimentTitle, submitterEmail, submitterLastName, null, admin, skylineDocName); + } + + protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitterEmail, + @Nullable String submitterLastName, @Nullable String submitterFirstName, @Nullable String admin, String skylineDocName) { - setupSourceFolder(projectName, folderName, submitter); + setupSourceFolder(projectName, folderName, submitterEmail); if (admin != null) { createFolderAdmin(projectName, folderName, admin); } - impersonate(submitter); + impersonate(submitterEmail); if (submitterLastName != null) { - updateSubmitterAccountInfo(submitterLastName); + updateSubmitterAccountInfo(submitterLastName, submitterFirstName); } // Import a Skyline document to the folder @@ -603,6 +615,47 @@ protected void verifyIsPublicColumn(String panoramaPublicProject, String experim assertEquals(isPublic ? "Yes" : "No", expListTable.getDataAsText(0, "Public")); } + protected void savePrivateDataReminderSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency) + { + savePrivateDataReminderSettings(extensionLength, delayUntilFirstReminder, reminderFrequency, false); + } + + protected void savePrivateDataReminderSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency, boolean enablePublicationSearch) + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + + setFormElement(Locator.input("delayUntilFirstReminder"), delayUntilFirstReminder); + setFormElement(Locator.input("reminderFrequency"), reminderFrequency); + setFormElement(Locator.input("extensionLength"), extensionLength); + if (enablePublicationSearch) + { + checkCheckbox(Locator.checkboxByName("enablePublicationSearch")); + } + else + { + uncheckCheckbox(Locator.checkboxByName("enablePublicationSearch")); + } + clickButton("Save", 0); + waitForText("Private data reminder settings saved"); + clickAndWait(Locator.linkWithText("Back to Private Data Reminder Settings")); + + assertEquals(String.valueOf(delayUntilFirstReminder), getFormElement(Locator.input("delayUntilFirstReminder"))); + assertEquals(String.valueOf(reminderFrequency), getFormElement(Locator.input("reminderFrequency"))); + assertEquals(String.valueOf(extensionLength), getFormElement(Locator.input("extensionLength"))); + } + + protected void goToSendRemindersPage(String projectName) + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + selectOptionByText(Locator.name("journal"), projectName); + clickAndWait(Locator.linkWithText("Send Reminders Now")); + waitForText(projectName, "A reminder message will be sent to the submitters of the selected experiments"); + } + @Override protected void doCleanup(boolean afterTest) throws TestTimeoutException { diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java index 0ead1b8a..50e34e25 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java @@ -129,7 +129,7 @@ private void testSendingReminders(String projectName, List dataF assertEquals(2, privateDataCount); log("Changing reminder settings. Setting reminder frequency to 0."); - saveSettings("2", "12", "0"); + savePrivateDataReminderSettings("2", "12", "0"); // Do not select any experiments. Job will not run. log("Attempt to send reminders without selecting any experiments. Job should not run."); @@ -145,7 +145,7 @@ private void testSendingReminders(String projectName, List dataF verifyPipelineJobLogMessage(projectName, message, message2, "Skipped posting reminders for 2 experiments"); log("Changing reminder settings. Setting delay until first reminder to 0."); - saveSettings("2", "0", "0"); + savePrivateDataReminderSettings("2", "0", "0"); // Post reminders in test mode. log("Posting reminders in test mode. Select all experiment rows."); @@ -161,7 +161,7 @@ private void testSendingReminders(String projectName, List dataF // Change the reminder frequency to 1. log("Changing reminder settings. Setting reminder frequency to 1."); - saveSettings("2", "0", "1"); + savePrivateDataReminderSettings("2", "0", "1"); // Post reminders again. Since reminder frequency is set to 1, no reminders will be posted to the first data. postReminders(projectName, false, privateDataCount, -1, ++pipelineJobCount); verifyReminderPosted(projectName, privateData.get(0), 1); // No new reminders since reminder frequency is set to 1. @@ -171,7 +171,7 @@ private void testSendingReminders(String projectName, List dataF // Change reminder frequency to 0 again. log("Changing reminder settings. Setting reminder frequency to 0."); - saveSettings("2", "0", "0"); + savePrivateDataReminderSettings("2", "0", "0"); // Request extension for the first experiment. log("Requesting extension for experiment Id " + privateData.get(0).getExperimentAnnotationsId()); @@ -186,6 +186,7 @@ private void testSendingReminders(String projectName, List dataF // Request deletion for the second experiment. log("Requesting deletion for experiment Id " + privateData.get(1).getExperimentAnnotationsId()); requestDeletion(projectName, privateData.get(1)); + // Post reminders again - none should be posted postReminders(projectName, false, privateDataCount, -1, ++pipelineJobCount); verifyReminderPosted(projectName, privateData.get(0),1); // No new reminders since extension requested. @@ -291,24 +292,6 @@ private void verifyReminderPosted(String projectName, DataFolderInfo folderInfo, } } - private void saveSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency) - { - goToAdminConsole().goToSettingsSection(); - clickAndWait(Locator.linkWithText("Panorama Public")); - clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - - setFormElement(Locator.input("delayUntilFirstReminder"), delayUntilFirstReminder); - setFormElement(Locator.input("reminderFrequency"), reminderFrequency); - setFormElement(Locator.input("extensionLength"), extensionLength); - clickButton("Save", 0); - waitForText("Private data reminder settings saved"); - clickAndWait(Locator.linkWithText("Back to Private Data Reminder Settings")); - - // clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - assertEquals(String.valueOf(delayUntilFirstReminder), getFormElement(Locator.input("delayUntilFirstReminder"))); - assertEquals(String.valueOf(reminderFrequency), getFormElement(Locator.input("reminderFrequency"))); - assertEquals(String.valueOf(extensionLength), getFormElement(Locator.input("extensionLength"))); - } private void postRemindersNoExperimentsSelected(String projectName, int expectedExperimentCount) { @@ -373,15 +356,6 @@ private void postReminders(String projectName, boolean testMode, int expectedExp } } - private void goToSendRemindersPage(String projectName) - { - goToAdminConsole().goToSettingsSection(); - clickAndWait(Locator.linkWithText("Panorama Public")); - clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - selectOptionByText(Locator.name("journal"), projectName); - clickAndWait(Locator.linkWithText("Send Reminders Now")); - waitForText(projectName, "A reminder message will be sent to the submitters of the selected experiments"); - } @Override protected void doCleanup(boolean afterTest) throws TestTimeoutException diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java new file mode 100644 index 00000000..0e1a77cd --- /dev/null +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java @@ -0,0 +1,242 @@ +package org.labkey.test.tests.panoramapublic; + +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.labkey.remoteapi.CommandException; +import org.labkey.remoteapi.CommandResponse; +import org.labkey.remoteapi.Connection; +import org.labkey.remoteapi.SimpleGetCommand; +import org.labkey.test.BaseWebDriverTest; +import org.labkey.test.Locator; +import org.labkey.test.TestProperties; +import org.labkey.test.TestTimeoutException; +import org.labkey.test.WebTestHelper; +import org.labkey.test.categories.External; +import org.labkey.test.categories.MacCossLabModules; +import org.labkey.test.util.ApiPermissionsHelper; +import org.labkey.test.util.DataRegionTable; + +import java.io.IOException; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.labkey.test.util.PermissionsHelper.READER_ROLE; + +@Category({External.class, MacCossLabModules.class}) +@BaseWebDriverTest.ClassTimeout(minutes = 7) +public class PublicationSearchTest extends PanoramaPublicBaseTest +{ + private static final String SKY_FILE = "MRMer.zip"; + + private static final String SUBMITTER_USER = "submitter@panoramapublic.test"; + private static final String ADMIN_USER = "admin@panoramapublic.test"; + + private static final String EXPERIMENT_TITLE = "Design, Implementation and Multisite Evaluation of a System Suitability Protocol" + + " for the Quantitative Assessment of Instrument Performance in Liquid Chromatography-Multiple Reaction Monitoring-MS (LC-MRM-MS)"; + private static final String PXD_ID = "PXD010535"; + private static final String PMID = "23689285"; + + private static final String SOURCE_FOLDER = "Pub Search Source"; + private static final String TARGET_FOLDER = "Pub Search Copy"; + + private boolean _useMockNcbi = false; + + /** + * Tests the publication search flow: + * > search for publications + * > notify submitter + * > submitter dismisses suggestion + * > verify publication not suggested again + * Uses mock NCBI service when running on TeamCity, and NCBI API is not reachable. + * The mock service returns saved data for PMID 23689285 (Abbatiello et al., Mol Cell Proteomics 2013). + * The test dataset uses: + * - Submitter: Birgit Schilling (author #3 on the paper) + * - PXD: PXD010535 + * - Title matches the paper title + */ + @Test + public void testPublicationSearchAndDismiss() + { + String panoramaPublicProject = PANORAMA_PUBLIC; + goToProjectHome(panoramaPublicProject); + ApiPermissionsHelper permissionsHelper = new ApiPermissionsHelper(this); + permissionsHelper.setSiteGroupPermissions("Guests", READER_ROLE); + + // Step 1: Set up mock NCBI service if running on TeamCity + // On dev machine only mock if NCBI is not reachable. + setupMockNcbiService(); + + // Step 2: Create folder, submit to Panorama Public, and copy + String testProject = getProjectName(); + String shortAccessUrl = setupFolderSubmitAndCopy(testProject, SOURCE_FOLDER, TARGET_FOLDER, + EXPERIMENT_TITLE, SUBMITTER_USER, "Schilling", "Birgit", ADMIN_USER, SKY_FILE); + + // Navigate to the Panorama Public copy folder and get the experiment ID + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + goToExperimentDetailsPage(); + int exptId = Integer.parseInt(portalHelper.getUrlParam("id")); + + // Step 3: Assign PXD ID to the experiment via API + assignPxdId(panoramaPublicProject, TARGET_FOLDER, exptId, PXD_ID); + + // Step 4: Search publications for the dataset — should find PMID 23689285 + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); + waitForText("Publications Matches for Dataset"); + + // Verify the publication was found + assertTextPresent(PMID); + // assertTextPresent("ProteomeXchange ID"); + assertTextPresent("Author, Title"); + + // Step 5: Select the publication and notify the submitter + click(Locator.radioButtonByNameAndValue("publicationId", PMID)); + clickButton("Notify Submitter"); + + // Step 6: Verify the reminder message was posted in the support thread + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); + waitForText("Submitted - " + shortAccessUrl); + assertTextPresent("Action Required: Publication Found for Your Data on Panorama Public"); + assertTextPresent("We found a paper that appears to be associated with your private data on Panorama Public"); + assertTextPresent("Abbatiello SE, Mani DR, Schilling B"); // Verify the citation is included in the message + assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID)); // Verify the PubMed link + + // Step 7: Impersonate submitter, dismiss the publication suggestion + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); + impersonate(SUBMITTER_USER); + waitForText("Submitted - " + shortAccessUrl); + click(Locator.linkWithText("Dismiss Publication Suggestion")); + waitForText("Dismiss Publication Suggestion"); + clickButton("OK", 0); + waitForText("The publication suggestion has been dismissed"); + stopImpersonating(); + + // Verify the dismissal notification was posted in the support thread + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); + waitForText("Submitted - " + shortAccessUrl); + assertTextPresentInThisOrder( + // "Abbatiello SE, Mani DR, Schilling B", // From previous message + "Publication Suggestion Dismissed", + "Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public"); + // "Abbatiello SE, Mani DR, Schilling B"); // Citation should appear within the dismissal message, after the title and body + + // Step 8: Search publications for the dataset again and verify the dismissed publication is flagged + goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); + waitForText("Publications Matches for Dataset"); + assertTextPresent(PMID); + // The dismissed publication should show "Yes" in the User Dismissed column + assertTextPresent("User Dismissed"); + + // Find the row containing the PMID radio button and verify the last cell contains "Yes" + Locator dismissedCell = Locator.xpath("//tr[.//input[@value='" + PMID + "']]/td[last()]"); + assertEquals("Yes", getText(dismissedCell)); + + // Verify that we cannot notify the submitter for a dismissed publication + click(Locator.radioButtonByNameAndValue("publicationId", PMID)); + clickButton("Notify Submitter"); + assertTextPresent("has already dismissed the publication suggestion"); + + // Step 9: Run the post reminders pipeline job and verify the dismissed publication is skipped + // Set reminder settings: delayUntilFirstReminder=0, reminderFrequency=0, enablePublicationSearch=true + savePrivateDataReminderSettings("2", "0", "0", true); + + // Post reminders with publication search enabled + goToSendRemindersPage(panoramaPublicProject); + DataRegionTable table = new DataRegionTable("ExperimentAnnotations", getDriver()); + table.checkAllOnPage(); + checkCheckbox(Locator.checkboxByName("searchPublications")); + clickButton("Post Reminders", 0); + waitForPipelineJobsToComplete(1, "Post private data reminder messages", false); + + // Verify the pipeline job log contains the skip message for the dismissed publication + goToProjectHome(panoramaPublicProject); + goToDataPipeline(); + goToDataPipeline().clickStatusLink(0); + String skipMessage = String.format("User has dismissed publication suggestion for experiment %d; skipping search", exptId); + assertTextPresent(skipMessage); + } + + /** + * POST to SetupMockNcbiServiceAction. + */ + private void setupMockNcbiService() + { + boolean mock = TestProperties.isTestRunningOnTeamCity(); + if (!mock) return; + boolean checkNcbiReachable = false; + + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "setupMockNcbiService"); + command.setParameters(Map.of("checkNcbiReachable", checkNcbiReachable)); + CommandResponse response = command.execute(connection, "/"); + Object mockValue = response.getProperty("mock"); + _useMockNcbi = Boolean.TRUE.equals(mockValue); + if (_useMockNcbi) + { + log("Using mock NCBI service" + (checkNcbiReachable ? " (NCBI not reachable)" : "")); + } + else + { + log("Using real NCBI service (NCBI is reachable)"); + } + } + catch (IOException | CommandException e) + { + fail("Failed to set up mock NCBI service: " + e.getMessage()); + } + } + + /** + * POST to RestoreNcbiServiceAction to restore the real NCBI service. + */ + private void restoreNcbiService() + { + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "restoreNcbiService"); + command.execute(connection, "/"); + log("Restored real NCBI service"); + } + catch (IOException | CommandException e) + { + log("Warning: Failed to restore NCBI service: " + e.getMessage()); + } + } + + /** + * Assign a PXD ID to an experiment using the UpdatePxDetails page. + * Cannot use UpdateRowsCommand because the ExperimentAnnotations table is not updatable via the HTTP-based APIs. + */ + private void assignPxdId(String projectName, String folderName, int exptId, String pxdId) + { + goToProjectFolder(projectName, folderName); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "updatePxDetails", Map.of("id", String.valueOf(exptId)))); + waitForText("Update ProteomeXchange Details"); + setFormElement(Locator.input("pxId"), pxdId); + clickButton("Update"); + log("Assigned PXD ID " + pxdId + " to experiment " + exptId); + } + + @Override + protected void doCleanup(boolean afterTest) throws TestTimeoutException + { + if (_useMockNcbi) + { + restoreNcbiService(); + } + + _userHelper.deleteUsers(false, SUBMITTER_USER, ADMIN_USER); + super.doCleanup(afterTest); + } +} From 54a8223450aaf76f531e0086dbb0fa40b6cfa613 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 3 Mar 2026 14:25:45 -0800 Subject: [PATCH 18/27] Adding classes missing from previous commit. --- .../MockNcbiPublicationSearchService.java | 96 ++ .../NcbiPublicationSearchServiceImpl.java | 1319 +++++++++++++++++ 2 files changed, 1415 insertions(+) create mode 100644 panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java create mode 100644 panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java new file mode 100644 index 00000000..1efdd3a4 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java @@ -0,0 +1,96 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.util.Pair; +import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Collections; +import java.util.Date; +import java.util.List; + +/** + * Mock implementation of {@link NcbiPublicationSearchService} that returns canned data for PMID 23689285 + * (Abbatiello et al., Mol Cell Proteomics 2013). Used by Selenium tests when NCBI is not reachable. + */ +public class MockNcbiPublicationSearchService implements NcbiPublicationSearchService +{ + private static final String PMID = "23689285"; + + private static final String CITATION = "Abbatiello SE, Mani DR, Schilling B, Maclean B, Zimmerman LJ, " + + "Feng X, Cusack MP, Sedransk N, Hall SC, Addona T, Allen S, Dodder NG, Ghosh M, Held JM, Hedrick V, " + + "Inerowicz HD, Jackson A, Keshishian H, Kim JW, Lyssand JS, Riley CP, Rudnick P, Sadowski P, " + + "Shaddox K, Smith D, Tomazela D, Wahlander A, Waldemarson S, Whitwell CA, You J, Zhang S, " + + "Kinsinger CR, Mesri M, Rodriguez H, Borchers CH, Buck C, Fisher SJ, Gibson BW, Liebler D, " + + "Maccoss M, Neubert TA, Paulovich A, Regnier F, Skates SJ, Tempst P, Wang M, Carr SA. " + + "Design, implementation and multisite evaluation of a system suitability protocol for the quantitative " + + "assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS). " + + "Mol Cell Proteomics. 2013 Sep;12(9):2623-39. doi: 10.1074/mcp.M112.027078. Epub 2013 May 20. " + + "PMID: 23689285; PMCID: PMC3769335."; + + private static final Date PUBLISHED_DATE; + static + { + try + { + PUBLISHED_DATE = new SimpleDateFormat("yyyy MMM").parse("2013 Sep"); + } + catch (ParseException e) + { + throw new RuntimeException(e); + } + } + + @Override + public @Nullable String getCitation(String publicationId, DB database) + { + if (PMID.equals(publicationId)) + { + return CITATION; + } + return null; + } + + @Override + public @Nullable Pair getPubMedLinkAndCitation(String pubmedId) + { + if (PMID.equals(pubmedId)) + { + return new Pair<>(NcbiConstants.getPubmedLink(pubmedId), CITATION); + } + return null; + } + + @Override + public @Nullable PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) + { + List matches = searchForPublication(expAnnotations, 1, logger, true); + return matches.isEmpty() ? null : matches.get(0); + } + + @Override + public List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) + { + PublicationMatch match = new PublicationMatch( + PMID, + DB.PubMed, + false, // foundByPxId (simulates finding via PXD010535) + false, // foundByUrl + false, // foundByDoi + true, // authorMatch + true, // titleMatch + PUBLISHED_DATE + ); + + if (getCitations) + { + match.setCitation(CITATION); + } + + return Collections.singletonList(match); + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java new file mode 100644 index 00000000..dc3c61c5 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -0,0 +1,1319 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.config.ConnectionConfig; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.client5.http.impl.io.BasicHttpClientConnectionManager; +import org.apache.hc.client5.http.HttpResponseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.util.Timeout; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Test; +import org.labkey.api.util.Pair; +import org.labkey.api.util.StringUtilsLabKey; +import org.labkey.api.util.logging.LogHelper; +import org.labkey.panoramapublic.datacite.DataCiteService; +import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_DOI; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PANORAMA_URL; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PX_ID; + +/** + * Real implementation of {@link NcbiPublicationSearchService} that searches NCBI via HTTP. + */ +public class NcbiPublicationSearchServiceImpl implements NcbiPublicationSearchService +{ + private static final Logger LOG = LogHelper.getLogger(NcbiPublicationSearchServiceImpl.class, "Search NCBI for publications associated with Panorama Public datasets"); + + // Static holder for the service instance + private static NcbiPublicationSearchService _instance = new NcbiPublicationSearchServiceImpl(); + + public static NcbiPublicationSearchService getInstance() + { + return _instance; + } + + public static void setInstance(NcbiPublicationSearchService impl) + { + _instance = impl; + } + + // NCBI API endpoints + private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; + private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; + private static final String EINFO_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"; + + // NCBI Literature Citation Exporter endpoints + private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; + private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=citation&id="; + + // API parameters + private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec + private static final int TIMEOUT_MS = 10000; // 10 seconds + private static final int REACHABILITY_TIMEOUT_MS = 5000; // 5 seconds + private static final String NCBI_EMAIL = "panorama@proteinms.net"; + + // Preprint indicators + private static final String[] PREPRINT_INDICATORS = { + "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", + "research square", "preprints.org" + }; + + // Title keyword extraction + private static final int MIN_KEYWORD_LENGTH = 3; + private static final double KEYWORD_MATCH_THRESHOLD = 0.6; // 60% of keywords must match + + // Stop words for title keyword matching. These words are not meaningful discriminators between papers. + // Function words: articles, prepositions, conjunctions, auxiliary verbs, pronouns. + // Title fillers: common words in paper titles that are not discriminating. + private static final Set TITLE_STOP_WORDS = Set.of( + + // Function words + "the", "this", "that", "these", "those", "its", // articles / determiners + "for", "with", "from", "into", "upon", "via", "after", "about", // prepositions + "between", "through", "across", "under", "over", "during", + "and", "but", "than", // conjunctions + "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary / modal verbs + "our", "their", // pronouns + "not", "all", "also", "both", "each", "how", "use", "used", // other function words + "here", "well", "two", "one", "more", "most", "only", "such", "other", "which", + + // Title fillers + "using", "based", "reveals", "revealed", "show", "shows", "shown", + "role", "study", "studies" + ); + + private static Logger getLog(@Nullable Logger logger) + { + return logger != null ? logger : LOG; + } + + /** + * Checks whether the NCBI E-utilities API is reachable. + * Performs a HEAD request to the einfo endpoint with a 5-second timeout. + * @return true if NCBI responds with HTTP 200, false otherwise + */ + public static boolean isNcbiReachable() + { + HttpURLConnection conn = null; + try + { + URL url = new URL(EINFO_URL); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("HEAD"); + conn.setConnectTimeout(REACHABILITY_TIMEOUT_MS); + conn.setReadTimeout(REACHABILITY_TIMEOUT_MS); + return conn.getResponseCode() == HttpURLConnection.HTTP_OK; + } + catch (IOException e) + { + LOG.info("NCBI is not reachable: " + e.getMessage()); + return false; + } + finally + { + if (conn != null) conn.disconnect(); + } + } + + @Override + public @Nullable String getCitation(String publicationId, DB database) + { + if (publicationId == null || !publicationId.matches(NcbiConstants.PUBMED_ID)) + { + return null; + } + + String baseUrl = database == DB.PMC ? PMC_CITATION_EXPORTER_URL : PUBMED_CITATION_EXPORTER_URL; + String queryUrl = baseUrl + publicationId; + + HttpURLConnection conn = null; + try + { + URL url = new URL(queryUrl); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + + int status = conn.getResponseCode(); + + if (status == HttpURLConnection.HTTP_OK) + { + String response; + try (InputStream in = conn.getInputStream()) + { + response = IOUtils.toString(in, StandardCharsets.UTF_8); + } + return parseCitation(response, publicationId, database); + } + } + catch (IOException e) + { + LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); + } + finally + { + if (conn != null) conn.disconnect(); + } + return null; + } + + @Override + public Pair getPubMedLinkAndCitation(String pubmedId) + { + String citation = getCitation(pubmedId, DB.PubMed); + return citation != null ? new Pair<>(NcbiConstants.getPubmedLink(pubmedId), citation) : null; + } + + static String parseCitation(String response, String publicationId, DB database) + { + try + { + var jsonObject = new JSONObject(response); + var nlmInfo = jsonObject.optJSONObject("nlm"); + return null != nlmInfo ? nlmInfo.getString("orig") : null; + } + catch (JSONException e) + { + LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); + } + return null; + } + + @Override + public PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) + { + List matches = searchForPublication(expAnnotations, 1, logger, true); + return matches.isEmpty() ? null : matches.get(0); + } + + @Override + public List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) + { + Logger log = getLog(logger); + maxResults = Math.max(1, Math.min(maxResults, NcbiPublicationSearchService.MAX_RESULTS)); + log.info("Starting publication search for experiment: {}", expAnnotations.getId()); + + // Search PubMed Central first + List matchedArticles = searchPmc(expAnnotations, log); + + // If no PMC results, fall back to PubMed + if (matchedArticles.isEmpty()) + { + log.info("No PMC articles found, trying PubMed fallback"); + matchedArticles = searchPubMed(expAnnotations, log); + } + + // Build and return result + if (matchedArticles.isEmpty()) + { + log.info("No publications found"); + return Collections.emptyList(); + } + + if (matchedArticles.size() > maxResults) + { + matchedArticles = matchedArticles.subList(0, maxResults); + } + + if (getCitations) + { + // Fetch citations for each match + for (PublicationMatch match : matchedArticles) + { + rateLimit(); + match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType())); + } + } + + log.info("Returning {}", StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); + return matchedArticles; + } + + /** + * Search PubMed Central + */ + private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) + { + // Track PMC IDs found by each strategy + Map> pmcIdsByStrategy = new HashMap<>(); + + // Step 1: Search PMC by PX ID + if (!StringUtils.isBlank(expAnnotations.getPxid())) + { + log.debug("Searching PMC by PX ID: {}", expAnnotations.getPxid()); + List ids = searchPmc(quote(expAnnotations.getPxid()), log); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put(MATCH_PX_ID, ids); + log.debug("Found {} PMC articles by PX ID", ids.size()); + } + rateLimit(); + } + + // Step 2: Search PMC by Panorama URL + if (expAnnotations.getShortUrl() != null) + { + String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); + log.debug("Searching PMC by Panorama URL: {}", panoramaUrl); + List ids = searchPmc(quote(panoramaUrl), log); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); + log.debug("Found {} PMC articles by Panorama URL", ids.size()); + } + rateLimit(); + } + + // Step 3: Search PMC by DOI + if (!StringUtils.isBlank(expAnnotations.getDoi())) + { + String doiUrl = expAnnotations.getDoi().startsWith("http") + ? expAnnotations.getDoi() + : DataCiteService.toUrl(expAnnotations.getDoi()); + log.debug("Searching PMC by DOI: {}", doiUrl); + List ids = searchPmc(quote(doiUrl), log); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put(MATCH_DOI, ids); + log.debug("Found {} PMC articles by DOI", ids.size()); + } + rateLimit(); + } + + // Accumulate unique PMC IDs and track which strategies found each + Set uniquePmcIds = new HashSet<>(); + Map> idToStrategies = new HashMap<>(); + + for (Map.Entry> entry : pmcIdsByStrategy.entrySet()) + { + String strategy = entry.getKey(); + for (String id : entry.getValue()) + { + uniquePmcIds.add(id); + idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy); + } + } + + log.info("Total unique PMC IDs found: {}", uniquePmcIds.size()); + + // Fetch and verify PMC articles + List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); + + // Apply priority filtering + return applyPriorityFiltering(pmcArticles, expAnnotations.getCreated(), log); + } + + /** + * Search PubMed Central with the given query + */ + private static List searchPmc(String query, Logger log) + { + return executeSearch(query, "pmc", log); + } + + /** + * Search PubMed with the given query + */ + private static List searchPubMed(String query, Logger log) + { + return executeSearch(query, "pubmed", log); + } + + /** + * Execute search using NCBI ESearch API + */ + private static List executeSearch(String query, String database, Logger log) + { + String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = ESEARCH_URL + + "?db=" + database + + "&term=" + encodedQuery + + "&retmax=" + NcbiPublicationSearchService.MAX_RESULTS + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + + try + { + JSONObject json = getJson(url); + JSONObject eSearchResult = json.getJSONObject("esearchresult"); + JSONArray idList = eSearchResult.getJSONArray("idlist"); + + List ids = new ArrayList<>(); + for (int i = 0; i < idList.length(); i++) + { + ids.add(idList.getString(i)); + } + return ids; + } + catch (IOException | JSONException e) + { + log.error("Error searching {} with query: {}", database, query, e); + return Collections.emptyList(); + } + } + + /** + * Execute an HTTP GET request and parse the response as JSON. + * @throws IOException if the request fails or the server returns a non-2xx response + * @throws JSONException if the response body is not valid JSON + */ + private static JSONObject getJson(String url) throws IOException + { + ConnectionConfig connectionConfig = ConnectionConfig.custom() + .setConnectTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .setSocketTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + RequestConfig requestConfig = RequestConfig.custom() + .setResponseTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + BasicHttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(); + connectionManager.setConnectionConfig(connectionConfig); + + try (CloseableHttpClient client = HttpClientBuilder.create() + .setDefaultRequestConfig(requestConfig) + .setConnectionManager(connectionManager) + .build()) + { + HttpGet get = new HttpGet(url); + return client.execute(get, response -> { + int status = response.getCode(); + if (status < 200 || status >= 300) + { + throw new HttpResponseException(status, response.getReasonPhrase()); + } + String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + return new JSONObject(body); + }); + } + } + + /** + * Fetch metadata for PMC articles using ESummary API + */ + private static Map fetchPmcMetadata(Collection pmcIds, Logger log) + { + return fetchMetadata(pmcIds, "pmc", log); + } + + /** + * Fetch metadata for PubMed articles using ESummary API + */ + private static Map fetchPubMedMetadata(Collection pmids, Logger log) + { + return fetchMetadata(pmids, "pubmed", log); + } + + /** + * Fetch metadata using NCBI ESummary API (batch request) + */ + private static Map fetchMetadata(Collection ids, String database, Logger log) + { + if (ids.isEmpty()) return Collections.emptyMap(); + + String idString = String.join(",", ids); + String url = ESUMMARY_URL + + "?db=" + database + + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + + "&retmode=json" + + "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + + try + { + JSONObject json = getJson(url); + JSONObject result = json.optJSONObject("result"); + if (result == null) return Collections.emptyMap(); + + // Extract metadata for each ID + Map metadata = new HashMap<>(); + for (String id : ids) + { + JSONObject articleData = result.optJSONObject(id); + if (articleData != null) + { + metadata.put(id, articleData); + } + } + return metadata; + } + catch (IOException | JSONException e) + { + log.error("Error fetching {} metadata for IDs: {}", database, ids, e); + return Collections.emptyMap(); + } + } + + /** + * Fetch and verify PMC articles (filter preprints, check author/title matches) + */ + private static List fetchAndVerifyPmcArticles( + Set pmcIds, + Map> idToStrategies, + ExperimentAnnotations expAnnotations, + Logger log) + { + if (pmcIds.isEmpty()) return Collections.emptyList(); + + // Fetch all metadata in batch + Map metadata = fetchPmcMetadata(pmcIds, log); + + List articles = new ArrayList<>(); + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + + for (String pmcId : pmcIds) + { + JSONObject articleData = metadata.get(pmcId); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + String source = articleData.optString("source", ""); + String journal = articleData.optString("fulljournalname", ""); + log.info("Excluded preprint PMC{}: source=\"{}\", journal=\"{}\"", pmcId, source, journal); + continue; + } + + // Check author and title matches + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); + + // Extract PubMed ID, is found + String pubMedId = extractPubMedId(articleData); + String publicationId = pubMedId == null ? pmcId : pubMedId; + DB publicationType = pubMedId == null ? DB.PMC : DB.PubMed; + + // Get strategies that found this article + List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); + + // Map strategies to boolean flags + boolean foundByPxId = strategies.contains(MATCH_PX_ID); + boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); + boolean foundByDoi = strategies.contains(MATCH_DOI); + + Date pubDate = parsePublicationDate(articleData, log); + + PublicationMatch article = new PublicationMatch( + publicationId, + publicationType, + foundByPxId, + foundByUrl, + foundByDoi, + authorMatch, + titleMatch, + pubDate + ); + articles.add(article); + + log.info("PMC{} -> {} {} | Match info: {}", pmcId, publicationType, publicationId, article.getMatchInfo()); + } + + return articles; + } + + /** + * Apply priority filtering to narrow down results + */ + static List applyPriorityFiltering(List articles, @NotNull Date referenceDate, Logger log) + { + if (articles.size() <= 1) return articles; + + // Priority 1: Filter to articles found by multiple data IDs (most reliable) + List multipleIds = articles.stream() + .filter(a -> countDataIdMatches(a) >= 2) + .collect(Collectors.toList()); + + if (!multipleIds.isEmpty()) + { + log.debug("Filtered to {} article(s) found by multiple IDs", multipleIds.size()); + articles = multipleIds; + } + + if (articles.size() <= 1) return sortByDateProximity(articles, referenceDate); + + // Priority 2: Filter to articles with both Author AND Title match + List bothMatches = articles.stream() + .filter(a -> a.matchesAuthor() && a.matchesTitle()) + .collect(Collectors.toList()); + + if (!bothMatches.isEmpty()) + { + log.debug("Filtered to {} article(s) with both Author and Title match", bothMatches.size()); + articles = bothMatches; + } + + return sortByDateProximity(articles, referenceDate); + } + + /** + * Sort articles by proximity of publication date to the reference date (experiment creation date). + * Articles with dates closest to the reference date come first. + * Articles without a publication date are sorted to the end. + */ + static List sortByDateProximity(List articles, @NotNull Date referenceDate) + { + if (articles.size() <= 1) + { + return articles; + } + long refTime = referenceDate.getTime(); + articles = new ArrayList<>(articles); + articles.sort(Comparator.comparingLong(a -> + a.getPublicationDate() != null ? Math.abs(a.getPublicationDate().getTime() - refTime) : Long.MAX_VALUE + )); + return articles; + } + + private static int countDataIdMatches(PublicationMatch a) + { + int count = 0; + if (a.matchesProteomeXchangeId()) count++; + if (a.matchesPanoramaUrl()) count++; + if (a.matchesDoi()) count++; + return count; + } + + /** + * Fall back to PubMed search if PMC finds nothing + */ + private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) + { + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + String title = expAnnotations.getTitle(); + + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName) || StringUtils.isBlank(title)) + { + log.info("Cannot perform PubMed fallback - missing author or title information"); + return Collections.emptyList(); + } + + // Search PubMed: "LastName FirstName[Author] AND Title NOT preprint[Publication Type]" + String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", + lastName, firstName, title); + + log.debug("PubMed fallback query: {}", query); + List pmids = searchPubMed(query, log); + + if (pmids.isEmpty()) + { + log.info("PubMed fallback found no results"); + return Collections.emptyList(); + } + + log.info("PubMed fallback found {} result(s), verifying...", pmids.size()); + + // Fetch metadata and verify + Map metadata = fetchPubMedMetadata(pmids, log); + + List articles = new ArrayList<>(); + for (String pmid : pmids) + { + JSONObject articleData = metadata.get(pmid); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + log.info("Excluded preprint PMID {}", pmid); + continue; + } + + // Verify both author AND title match + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, title); + + // Only accept if BOTH match + if (authorMatch && titleMatch) + { + Date pubDate = parsePublicationDate(articleData, log); + PublicationMatch article = new PublicationMatch( + pmid, + DB.PubMed, + false, // Not found by PX ID + false, // Not found by Panorama URL + false, // Not found by DOI + true, // Matched by author + true, // Matched by title + pubDate + ); + articles.add(article); + log.info("PMID {} verified with both Author and Title match", pmid); + } + } + + log.info("Verified {} of {} PubMed results", articles.size(), pmids.size()); + return articles; + } + + /** + * Check if article is a preprint + */ + static boolean isPreprint(JSONObject metadata) + { + String source = metadata.optString("source", "").toLowerCase(); + String journal = metadata.optString("fulljournalname", "").toLowerCase(); + + for (String indicator : PREPRINT_INDICATORS) + { + if (source.contains(indicator) || journal.contains(indicator)) + { + return true; + } + } + return false; + } + + /** + * Check if article matches the author (submitter) + */ + static boolean checkAuthorMatch(JSONObject metadata, String firstName, String lastName) + { + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName)) + { + return false; + } + + JSONArray authors = metadata.optJSONArray("authors"); + if (authors == null || authors.isEmpty()) + { + return false; + } + + String firstInitial = firstName.substring(0, 1).toLowerCase(); + String lastNameLower = lastName.toLowerCase(); + + for (int i = 0; i < authors.length(); i++) + { + JSONObject author = authors.optJSONObject(i); + if (author == null) continue; + + String authorName = author.optString("name", "").toLowerCase(); + + // Match format: "LastName FirstInitial" or "LastName F" + if (authorName.startsWith(lastNameLower)) + { + String afterLastName = authorName.substring(lastNameLower.length()).trim(); + if (afterLastName.startsWith(firstInitial)) + { + return true; + } + } + } + + return false; + } + + /** + * Check if article title matches the dataset title + */ + static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) + { + if (StringUtils.isBlank(datasetTitle)) + { + return false; + } + + // Try title first, then sorttitle as fallback + String title = normalizeTitle(metadata.optString("title")); + String normalizedArticleTitle = title.isEmpty() + ? normalizeTitle(metadata.optString("sorttitle")) + : title; + + if (normalizedArticleTitle.isEmpty()) + { + return false; + } + + String normalizedDatasetTitle = normalizeTitle(datasetTitle); + + // Try exact match first + if (normalizedArticleTitle.equals(normalizedDatasetTitle)) + { + return true; + } + + // Bi-directional keyword matching: match if either direction meets the threshold. + // This handles cases where one title is much more specific than the other. + return keywordsMatch(datasetTitle, normalizedArticleTitle) + || keywordsMatch(metadata.optString("title", metadata.optString("sorttitle", "")), normalizedDatasetTitle); + } + + /** + * Check if keywords extracted from {@code sourceTitle} are present in {@code normalizedTarget}. + * Returns true if at least 60% of keywords match (with a minimum of 2). + */ + private static boolean keywordsMatch(String sourceTitle, String normalizedTarget) + { + List keywords = extractTitleKeywords(sourceTitle); + if (keywords.isEmpty()) + { + return false; + } + + long matchCount = keywords.stream() + .filter(keyword -> normalizedTarget.contains(keyword)) + .count(); + + int required = (int) Math.ceil(keywords.size() * KEYWORD_MATCH_THRESHOLD); + required = Math.max(required, Math.min(2, keywords.size())); + return matchCount >= required; + } + + /** + * Normalize title for comparison (lowercase, remove punctuation, normalize whitespace) + */ + static String normalizeTitle(String title) + { + if (title == null) return ""; + + return title.toLowerCase() + .replaceAll("[^\\w\\s]", "") // Remove punctuation + .replaceAll("\\s+", " ") // Normalize whitespace + .trim(); + } + + /** + * Extract meaningful keywords from title for matching. + * Returns all qualifying words (no cap) — the caller uses a percentage threshold. + */ + static List extractTitleKeywords(String title) + { + if (StringUtils.isBlank(title)) + { + return Collections.emptyList(); + } + + String[] words = title.toLowerCase().split("\\s+"); + List keywords = new ArrayList<>(); + + for (String word : words) + { + // Remove non-alphanumeric characters + word = word.replaceAll("[^a-z0-9]", ""); + + // Check length and exclude stop words + if (word.length() >= MIN_KEYWORD_LENGTH && !TITLE_STOP_WORDS.contains(word)) + { + keywords.add(word); + } + } + + return keywords; + } + + /** + * Extract PMID from PMC metadata + */ + static @Nullable String extractPubMedId(JSONObject pmcMetadata) + { + // PMC articles contain PMID in articleids array + JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); + if (articleIds != null) + { + for (int i = 0; i < articleIds.length(); i++) + { + JSONObject idObj = articleIds.optJSONObject(i); + if (idObj != null && "pmid".equals(idObj.optString("idtype"))) + { + return idObj.optString("value"); + } + } + } + + return null; + } + + /** + * Parse publication date from ESummary metadata. + * Tries "pubdate" first, then "epubdate". Dates are typically in "YYYY Mon DD" or "YYYY Mon" format. + */ + static @Nullable Date parsePublicationDate(JSONObject metadata, Logger log) + { + String dateStr = metadata.optString("pubdate", ""); + if (StringUtils.isBlank(dateStr)) + { + dateStr = metadata.optString("epubdate", ""); + } + if (StringUtils.isBlank(dateStr)) + { + return null; + } + + // NCBI dates are typically "YYYY Mon DD", "YYYY Mon", or "YYYY" + String[] formats = {"yyyy MMM dd", "yyyy MMM", "yyyy"}; + for (String format : formats) + { + try + { + return new SimpleDateFormat(format).parse(dateStr); + } + catch (ParseException ignored) + { + } + } + log.debug("Unable to parse publication date: {}", dateStr); + return null; + } + + /** + * Rate limiting: wait 400ms between API requests + */ + private static void rateLimit() + { + try + { + Thread.sleep(RATE_LIMIT_DELAY_MS); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + } + } + + /** + * Wrap string in quotes for exact match search + */ + private static String quote(String str) + { + return "\"" + str + "\""; + } + + public static class TestCase extends Assert + { + // -- parseCitation tests -- + + @Test + public void testParseCitation() + { + // Valid NLM citation response + String json = "{\"nlm\":{\"orig\":\"Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.\"}}"; + assertEquals("Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.", parseCitation(json, "23689285", DB.PubMed)); + + // Missing nlm key + assertEquals(null, parseCitation("{\"ama\":{\"orig\":\"something\"}}", "23689285", DB.PubMed)); + + // Malformed JSON + assertEquals(null, parseCitation("not json", "23689285", DB.PubMed)); + + // Empty nlm object (missing "orig" key) + assertEquals(null, parseCitation("{\"nlm\":{}}", "23689285", DB.PubMed)); + } + + // -- isPreprint tests -- + + @Test + public void testIsPreprint() + { + // Non-preprint + assertFalse(isPreprint(articleMetadata("J Proteome Res", "Journal of Proteome Research"))); + + // Preprint by source + assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv"))); + assertTrue(isPreprint(articleMetadata("medRxiv", ""))); + + // Preprint by journal name + assertTrue(isPreprint(articleMetadata("", "Research Square"))); + assertTrue(isPreprint(articleMetadata("", "ChemRxiv preprint"))); + + // Case insensitive + assertTrue(isPreprint(articleMetadata("BIORXIV", ""))); + + // Empty fields + assertFalse(isPreprint(articleMetadata("", ""))); + } + + // -- checkAuthorMatch tests -- + + @Test + public void testCheckAuthorMatch() + { + // Standard match: "Sharma V" matches firstName=Vagisha, lastName=Sharma + JSONObject metadata = metadataWithAuthors("Sharma V", "Jones AB", "Smith CD"); + assertTrue(checkAuthorMatch(metadata, "Vagisha", "Sharma")); + + // Match is case-insensitive + assertTrue(checkAuthorMatch(metadataWithAuthors("sharma v"), "Vagisha", "Sharma")); + assertTrue(checkAuthorMatch(metadataWithAuthors("SHARMA V"), "vagisha", "sharma")); + + // No matching author + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones AB", "Smith CD"), "Vagisha", "Sharma")); + + // Blank first or last name + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "", "Sharma")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "Vagisha", "")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), null, "Sharma")); + + // Empty authors array + assertFalse(checkAuthorMatch(new JSONObject(), "Vagisha", "Sharma")); + + // Author with full first name: "Sharma Vagisha" + assertTrue(checkAuthorMatch(metadataWithAuthors("Sharma Vagisha"), "Vagisha", "Sharma")); + } + + // -- checkTitleMatch tests -- + + @Test + public void testCheckTitleMatchExact() + { + // Exact match (case-insensitive, punctuation-stripped) + JSONObject metadata = metadataWithTitle("Quantitative Proteomics of Muscle Fibers"); + assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers")); + assertTrue(checkTitleMatch(metadata, "quantitative proteomics of muscle fibers")); + assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers!")); + } + + @Test + public void testCheckTitleMatchKeywords() + { + // Article: "quantitative proteomics reveals muscle fibers composition" + // Dataset: "muscle fibers composition proteomics quantitative patterns" + // Dataset keywords: "muscle", "fibers", "composition", "proteomics", "quantitative", "patterns" (6 keywords) + // Matches in article: "muscle", "fibers", "composition", "proteomics", "quantitative" (5 of 6 = 83%) -> pass + JSONObject metadata = metadataWithTitle("A study of quantitative proteomics reveals muscle fibers composition"); + assertTrue(checkTitleMatch(metadata, "muscle fibers composition proteomics quantitative patterns")); + + // Keywords not present in article title — well below 60% + assertFalse(checkTitleMatch(metadata, "Novel cardiac lipids quantitation in mouse tissue")); + } + + @Test + public void testCheckTitleMatchThreshold() + { + // Article contains: "phosphoproteomics", "analysis", "lung", "cancer", "cell", "lines" + JSONObject metadata = metadataWithTitle("Phosphoproteomics analysis of lung cancer cell lines"); + + // 4 of 5 keywords match (80%) -> pass (threshold is 60%) + assertTrue(checkTitleMatch(metadata, "phosphoproteomics lung cancer cell biomarkers")); + + // Only 2 of 5 keywords match (40%) -> fail + assertFalse(checkTitleMatch(metadata, "phosphoproteomics kidney heart liver cancer")); + } + + @Test + public void testCheckTitleMatchShortKeywords() + { + // Short but meaningful words (3-4 chars) should be extracted as keywords + JSONObject metadata = metadataWithTitle("DIA proteomics of lung cell iron metabolism in mice"); + assertTrue(checkTitleMatch(metadata, "DIA lung cell iron mice proteomics")); + } + + @Test + public void testCheckTitleMatchBidirectional() + { + // Forward direction fails: dataset is very specific, article is short. + // Dataset keywords: "comprehensive", "phosphoproteomics", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) + // Only "lung", "cancer" found in article (2 of 9 = 22%) -> forward fails + // + // Reverse direction passes: article keywords: "lung", "cancer", "proteomics" (3) + // All 3 found in dataset -> 100% -> reverse passes + JSONObject metadata = metadataWithTitle("Lung cancer proteomics"); + assertTrue(checkTitleMatch(metadata, + "Comprehensive phosphoproteomics analysis reveals novel biomarkers in lung cancer cell lines")); + + // Both directions fail — completely unrelated titles + assertFalse(checkTitleMatch(metadataWithTitle("Cardiac tissue lipidomics"), + "Hepatic transcriptomics in zebrafish embryos")); + } + + @Test + public void testCheckTitleMatchEdgeCases() + { + // Blank dataset title + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), "")); + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), null)); + + // Missing title in metadata + assertFalse(checkTitleMatch(new JSONObject(), "Some title")); + + // Dataset title with only stop words and short words below MIN_KEYWORD_LENGTH + // "a the and for with" -> all stop words or < 3 chars -> no keywords + assertFalse(checkTitleMatch(metadataWithTitle("Different title entirely"), "a the and for with")); + } + + @Test + public void testCheckTitleMatchSorttitleFallback() + { + // When "title" is missing, falls back to "sorttitle" + JSONObject metadata = new JSONObject(); + metadata.put("sorttitle", "phosphoproteomics of lung cancer"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + + // When "title" is present, it is used (not "sorttitle") + metadata = new JSONObject(); + metadata.put("title", "Phosphoproteomics of Lung Cancer"); + metadata.put("sorttitle", "completely different sort title"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + } + + // -- extractTitleKeywords tests -- + + @Test + public void testExtractTitleKeywords() + { + // Normal title — all non-stop words >= 3 chars are extracted + List keywords = extractTitleKeywords("Novel phosphoproteomics workflow for cardiac tissue samples"); + assertTrue(keywords.contains("novel")); + assertTrue(keywords.contains("phosphoproteomics")); + assertTrue(keywords.contains("workflow")); + assertTrue(keywords.contains("cardiac")); + assertTrue(keywords.contains("tissue")); + assertTrue(keywords.contains("samples")); + assertFalse("'for' is too short", keywords.contains("for")); + + // Domain-specific words are NOT stop words — they should be kept + keywords = extractTitleKeywords("Quantitative proteomics characterization identification analysis"); + assertTrue("'proteomics' is domain-specific, not a stop word", keywords.contains("proteomics")); + assertTrue("'characterization' is domain-specific", keywords.contains("characterization")); + assertTrue("'identification' is domain-specific", keywords.contains("identification")); + assertTrue("'analysis' is domain-specific", keywords.contains("analysis")); + assertTrue(keywords.contains("quantitative")); + + // Function words and title fillers are excluded + keywords = extractTitleKeywords("the study using based reveals role"); + assertFalse("'the' is a function word", keywords.contains("the")); + assertFalse("'study' is a title filler", keywords.contains("study")); + assertFalse("'using' is a title filler", keywords.contains("using")); + assertFalse("'based' is a title filler", keywords.contains("based")); + assertFalse("'reveals' is a title filler", keywords.contains("reveals")); + assertFalse("'role' is a title filler", keywords.contains("role")); + + // Short meaningful words (>= 3 chars) are kept + keywords = extractTitleKeywords("DIA analysis of lung cell iron metabolism in mice"); + assertTrue("'dia' (3 chars) should be kept", keywords.contains("dia")); + assertTrue("'lung' (4 chars) should be kept", keywords.contains("lung")); + assertTrue("'cell' (4 chars) should be kept", keywords.contains("cell")); + assertTrue("'iron' (4 chars) should be kept", keywords.contains("iron")); + assertTrue("'mice' (4 chars) should be kept", keywords.contains("mice")); + assertFalse("'of' (2 chars) is too short", keywords.contains("of")); + assertFalse("'in' (2 chars) is too short", keywords.contains("in")); + + // No cap on number of keywords + keywords = extractTitleKeywords("alpha bravo charlie delta foxtrot hotel india juliet kilo lima"); + assertEquals(10, keywords.size()); + + // Blank input + assertTrue(extractTitleKeywords("").isEmpty()); + assertTrue(extractTitleKeywords(null).isEmpty()); + } + + // -- normalizeTitle tests -- + + @Test + public void testNormalizeTitle() + { + assertEquals("hello world", normalizeTitle("Hello, World!")); + assertEquals("testdriven development", normalizeTitle("Test-Driven Development")); + assertEquals("multiple spaces become one", normalizeTitle(" Multiple spaces become one ")); + assertEquals("", normalizeTitle(null)); + assertEquals("", normalizeTitle("")); + } + + // -- extractPubMedId tests -- + + @Test + public void testExtractPubMedId() + { + // PMC metadata with PMID + JSONObject metadata = new JSONObject(); + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", "28691345")); + metadata.put("articleids", articleIds); + assertEquals("28691345", extractPubMedId(metadata)); + + // No PMID in articleids + metadata = new JSONObject(); + articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + metadata.put("articleids", articleIds); + assertNull(extractPubMedId(metadata)); + + // No articleids key + assertNull(extractPubMedId(new JSONObject())); + } + + // -- parsePublicationDate tests -- + + @Test + public void testParsePublicationDate() + { + // "YYYY Mon DD" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan 15"), LOG)); + + // "YYYY Mon" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan"), LOG)); + + // "YYYY" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024"), LOG)); + + // Falls back to epubdate when pubdate is blank + assertNotNull(parsePublicationDate(metadataWithDate("epubdate", "2024 Mar 01"), LOG)); + + // Unparseable date + assertNull(parsePublicationDate(metadataWithDate("pubdate", "not-a-date"), LOG)); + + // Empty date fields + assertNull(parsePublicationDate(new JSONObject(), LOG)); + } + + // -- applyPriorityFiltering tests -- + + @Test + public void testApplyPriorityFiltering() + { + Date refDate = new Date(); + + // Single article returned as-is + PublicationMatch single = createMatch("111", true, false, false, false, false, refDate); + List result = applyPriorityFiltering(List.of(single), refDate, LOG); + assertEquals(1, result.size()); + + // Articles found by multiple data IDs preferred over single-ID matches + PublicationMatch multiId = createMatch("222", true, true, false, true, true, refDate); + PublicationMatch singleId = createMatch("333", true, false, false, true, true, refDate); + result = applyPriorityFiltering(List.of(singleId, multiId), refDate, LOG); + assertEquals(1, result.size()); + assertEquals("222", result.get(0).getPublicationId()); + + // Among single-ID matches, author+title both matching preferred + PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, refDate); + PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, refDate); + result = applyPriorityFiltering(List.of(authorOnly, bothMatch), refDate, LOG); + assertEquals(1, result.size()); + assertEquals("444", result.get(0).getPublicationId()); + } + + @Test + public void testSortByDateProximity() + { + Date refDate = new Date(); + Date closer = new Date(refDate.getTime() - 86400000L); // 1 day before + Date farther = new Date(refDate.getTime() - 86400000L * 365); // 1 year before + + PublicationMatch farMatch = createMatch("111", true, false, false, false, false, farther); + PublicationMatch closeMatch = createMatch("222", true, false, false, false, false, closer); + PublicationMatch noDate = createMatch("333", true, false, false, false, false, null); + + List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), refDate); + assertEquals("222", result.get(0).getPublicationId()); // closest + assertEquals("111", result.get(1).getPublicationId()); // farther + assertEquals("333", result.get(2).getPublicationId()); // no date last + } + + // -- PublicationMatch round-trip tests -- + + @Test + public void testPublicationMatchRoundTrip() + { + PublicationMatch original = new PublicationMatch("12345", DB.PubMed, true, true, false, true, false, null); + assertEquals("ProteomeXchange ID, Panorama URL, Author", original.getMatchInfo()); + + PublicationMatch restored = PublicationMatch.fromMatchInfo("12345", DB.PubMed, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertFalse(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertFalse(restored.matchesTitle()); + + // All flags + original = new PublicationMatch("67890", DB.PMC, true, true, true, true, true, null); + assertEquals("ProteomeXchange ID, Panorama URL, DOI, Author, Title", original.getMatchInfo()); + restored = PublicationMatch.fromMatchInfo("67890", DB.PMC, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertTrue(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertTrue(restored.matchesTitle()); + + // Empty match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, ""); + assertFalse(restored.matchesProteomeXchangeId()); + assertFalse(restored.matchesAuthor()); + + // Null match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, null); + assertFalse(restored.matchesProteomeXchangeId()); + } + + // -- Helper methods for building test JSON -- + + private static JSONObject articleMetadata(String source, String fullJournalName) + { + JSONObject metadata = new JSONObject(); + metadata.put("source", source); + metadata.put("fulljournalname", fullJournalName); + return metadata; + } + + private static JSONObject metadataWithAuthors(String... authorNames) + { + JSONObject metadata = new JSONObject(); + JSONArray authors = new JSONArray(); + for (String name : authorNames) + { + authors.put(new JSONObject().put("name", name)); + } + metadata.put("authors", authors); + return metadata; + } + + private static JSONObject metadataWithTitle(String title) + { + JSONObject metadata = new JSONObject(); + metadata.put("title", title); + return metadata; + } + + private static JSONObject metadataWithDate(String field, String dateStr) + { + JSONObject metadata = new JSONObject(); + metadata.put(field, dateStr); + return metadata; + } + + private static PublicationMatch createMatch(String id, boolean pxId, boolean url, boolean doi, + boolean author, boolean title, @Nullable Date pubDate) + { + return new PublicationMatch(id, DB.PubMed, pxId, url, doi, author, title, pubDate); + } + } +} From e2b3637a9e93ceec3789d3d632dde94ece81c731 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Tue, 3 Mar 2026 17:56:41 -0800 Subject: [PATCH 19/27] Override getJson() and getCitation() in MockNcbiPublicationSearchService --- .../MockNcbiPublicationSearchService.java | 128 ++++++++++++------ .../NcbiPublicationSearchServiceImpl.java | 20 +-- .../panoramapublic/PublicationSearchTest.java | 2 + 3 files changed, 97 insertions(+), 53 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java index 1efdd3a4..2e44fbfa 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java @@ -1,25 +1,25 @@ package org.labkey.panoramapublic.ncbi; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; -import org.labkey.api.util.Pair; -import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.json.JSONArray; +import org.json.JSONObject; import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Collections; -import java.util.Date; -import java.util.List; +import java.io.IOException; /** * Mock implementation of {@link NcbiPublicationSearchService} that returns canned data for PMID 23689285 * (Abbatiello et al., Mol Cell Proteomics 2013). Used by Selenium tests when NCBI is not reachable. + * + * Extends {@link NcbiPublicationSearchServiceImpl} and only overrides the two methods that make HTTP calls + * to NCBI: {@link #getJson(String)} (used by ESearch/ESummary) and {@link #getCitation(String, DB)} + * (used by the Citation Exporter API). All search logic, filtering, author/title verification, and + * priority filtering run through the real implementation code. */ -public class MockNcbiPublicationSearchService implements NcbiPublicationSearchService +public class MockNcbiPublicationSearchService extends NcbiPublicationSearchServiceImpl { private static final String PMID = "23689285"; + private static final String PMC_ID = "3769335"; private static final String CITATION = "Abbatiello SE, Mani DR, Schilling B, Maclean B, Zimmerman LJ, " + "Feng X, Cusack MP, Sedransk N, Hall SC, Addona T, Allen S, Dodder NG, Ghosh M, Held JM, Hedrick V, " + @@ -32,19 +32,37 @@ public class MockNcbiPublicationSearchService implements NcbiPublicationSearchSe "Mol Cell Proteomics. 2013 Sep;12(9):2623-39. doi: 10.1074/mcp.M112.027078. Epub 2013 May 20. " + "PMID: 23689285; PMCID: PMC3769335."; - private static final Date PUBLISHED_DATE; - static + private static final String ARTICLE_TITLE = "Design, implementation and multisite evaluation of a system suitability " + + "protocol for the quantitative assessment of instrument performance in liquid chromatography-multiple " + + "reaction monitoring-MS (LC-MRM-MS)."; + + /** + * Returns canned JSON for NCBI ESearch and ESummary API requests. + *
    + *
  • ESearch for PMC with a query containing "PXD010535" returns PMC ID 3769335
  • + *
  • ESummary for PMC ID 3769335 returns article metadata (authors, title, pubdate, PMID)
  • + *
  • All other requests return empty results
  • + *
+ * This allows the real search logic in {@link NcbiPublicationSearchServiceImpl} to run against mock data. + */ + @Override + protected JSONObject getJson(String url) throws IOException { - try + if (url.contains("esearch.fcgi")) { - PUBLISHED_DATE = new SimpleDateFormat("yyyy MMM").parse("2013 Sep"); + return handleESearch(url); } - catch (ParseException e) + else if (url.contains("esummary.fcgi")) { - throw new RuntimeException(e); + return handleESummary(url); } + throw new IOException("MockNcbiPublicationSearchService: unexpected URL: " + url); } + /** + * Returns the canned NLM citation for PMID 23689285. Returns null for any other publication ID. + * Overrides the real implementation which makes an HTTP call to the NCBI Citation Exporter API. + */ @Override public @Nullable String getCitation(String publicationId, DB database) { @@ -55,42 +73,66 @@ public class MockNcbiPublicationSearchService implements NcbiPublicationSearchSe return null; } - @Override - public @Nullable Pair getPubMedLinkAndCitation(String pubmedId) + /** + * Handle mock ESearch requests. Returns PMC ID 3769335 when query contains "PXD010535", + * empty results otherwise. + */ + private JSONObject handleESearch(String url) { - if (PMID.equals(pubmedId)) + JSONObject result = new JSONObject(); + JSONObject esearchResult = new JSONObject(); + + if (url.contains("PXD010535")) { - return new Pair<>(NcbiConstants.getPubmedLink(pubmedId), CITATION); + esearchResult.put("idlist", new JSONArray().put(PMC_ID)); + } + else + { + esearchResult.put("idlist", new JSONArray()); } - return null; - } - @Override - public @Nullable PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) - { - List matches = searchForPublication(expAnnotations, 1, logger, true); - return matches.isEmpty() ? null : matches.get(0); + result.put("esearchresult", esearchResult); + return result; } - @Override - public List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) + /** + * Handle mock ESummary requests. Returns article metadata for PMC ID 3769335. + */ + private JSONObject handleESummary(String url) { - PublicationMatch match = new PublicationMatch( - PMID, - DB.PubMed, - false, // foundByPxId (simulates finding via PXD010535) - false, // foundByUrl - false, // foundByDoi - true, // authorMatch - true, // titleMatch - PUBLISHED_DATE - ); - - if (getCitations) + JSONObject response = new JSONObject(); + JSONObject result = new JSONObject(); + + if (url.contains(PMC_ID)) { - match.setCitation(CITATION); + JSONObject articleData = new JSONObject(); + articleData.put("title", ARTICLE_TITLE); + articleData.put("sorttitle", ARTICLE_TITLE.toLowerCase()); + articleData.put("source", "Mol Cell Proteomics"); + articleData.put("fulljournalname", "Molecular & cellular proteomics : MCP"); + articleData.put("pubdate", "2013 Sep"); + articleData.put("epubdate", "2013 May 20"); + + // Authors — include first 3 plus a few more for realism + JSONArray authors = new JSONArray(); + authors.put(new JSONObject().put("name", "Abbatiello SE")); + authors.put(new JSONObject().put("name", "Mani DR")); + authors.put(new JSONObject().put("name", "Schilling B")); + authors.put(new JSONObject().put("name", "Maclean B")); + authors.put(new JSONObject().put("name", "Zimmerman LJ")); + authors.put(new JSONObject().put("name", "Carr SA")); + articleData.put("authors", authors); + + // Article IDs — PMC ID and PubMed ID + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC" + PMC_ID)); + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", PMID)); + articleData.put("articleids", articleIds); + + result.put(PMC_ID, articleData); } - return Collections.singletonList(match); + response.put("result", result); + return response; } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java index dc3c61c5..85f0f76f 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -264,7 +264,7 @@ public List searchForPublication(@NotNull ExperimentAnnotation /** * Search PubMed Central */ - private static @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) + private @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) { // Track PMC IDs found by each strategy Map> pmcIdsByStrategy = new HashMap<>(); @@ -338,7 +338,7 @@ public List searchForPublication(@NotNull ExperimentAnnotation /** * Search PubMed Central with the given query */ - private static List searchPmc(String query, Logger log) + private List searchPmc(String query, Logger log) { return executeSearch(query, "pmc", log); } @@ -346,7 +346,7 @@ private static List searchPmc(String query, Logger log) /** * Search PubMed with the given query */ - private static List searchPubMed(String query, Logger log) + private List searchPubMed(String query, Logger log) { return executeSearch(query, "pubmed", log); } @@ -354,7 +354,7 @@ private static List searchPubMed(String query, Logger log) /** * Execute search using NCBI ESearch API */ - private static List executeSearch(String query, String database, Logger log) + private List executeSearch(String query, String database, Logger log) { String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); String url = ESEARCH_URL + @@ -389,7 +389,7 @@ private static List executeSearch(String query, String database, Logger * @throws IOException if the request fails or the server returns a non-2xx response * @throws JSONException if the response body is not valid JSON */ - private static JSONObject getJson(String url) throws IOException + protected JSONObject getJson(String url) throws IOException { ConnectionConfig connectionConfig = ConnectionConfig.custom() .setConnectTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) @@ -424,7 +424,7 @@ private static JSONObject getJson(String url) throws IOException /** * Fetch metadata for PMC articles using ESummary API */ - private static Map fetchPmcMetadata(Collection pmcIds, Logger log) + private Map fetchPmcMetadata(Collection pmcIds, Logger log) { return fetchMetadata(pmcIds, "pmc", log); } @@ -432,7 +432,7 @@ private static Map fetchPmcMetadata(Collection pmcId /** * Fetch metadata for PubMed articles using ESummary API */ - private static Map fetchPubMedMetadata(Collection pmids, Logger log) + private Map fetchPubMedMetadata(Collection pmids, Logger log) { return fetchMetadata(pmids, "pubmed", log); } @@ -440,7 +440,7 @@ private static Map fetchPubMedMetadata(Collection pm /** * Fetch metadata using NCBI ESummary API (batch request) */ - private static Map fetchMetadata(Collection ids, String database, Logger log) + private Map fetchMetadata(Collection ids, String database, Logger log) { if (ids.isEmpty()) return Collections.emptyMap(); @@ -479,7 +479,7 @@ private static Map fetchMetadata(Collection ids, Str /** * Fetch and verify PMC articles (filter preprints, check author/title matches) */ - private static List fetchAndVerifyPmcArticles( + private List fetchAndVerifyPmcArticles( Set pmcIds, Map> idToStrategies, ExperimentAnnotations expAnnotations, @@ -612,7 +612,7 @@ private static int countDataIdMatches(PublicationMatch a) /** * Fall back to PubMed search if PMC finds nothing */ - private static List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) + private List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) { String firstName = expAnnotations.getSubmitterUser() != null ? expAnnotations.getSubmitterUser().getFirstName() : null; diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java index 0e1a77cd..e1f46e71 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java @@ -236,6 +236,8 @@ protected void doCleanup(boolean afterTest) throws TestTimeoutException restoreNcbiService(); } + // TODO: Reset the reminder settings to not search for publications. + _userHelper.deleteUsers(false, SUBMITTER_USER, ADMIN_USER); super.doCleanup(afterTest); } From 49925e07cd0da1bc989a01c58b13405802b398ad Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Wed, 4 Mar 2026 12:58:41 -0800 Subject: [PATCH 20/27] - Moved check for isNcbiReachable to PublicationSearchTest.java - Added API action class to register mock publication data with the MockNcbiPublicationSearchService - Updated PublicationSearchTest - test one more dataset --- .../PanoramaPublicController.java | 110 ++++-- .../PanoramaPublicNotification.java | 6 +- .../panoramapublic/model/DatasetStatus.java | 9 +- .../MockNcbiPublicationSearchService.java | 197 ++++++---- .../NcbiPublicationSearchServiceImpl.java | 34 +- .../panoramapublic/ncbi/PublicationMatch.java | 4 +- .../view/searchPublicationsForm.jsp | 2 +- .../PanoramaPublicBaseTest.java | 20 + .../panoramapublic/PublicationSearchTest.java | 362 ++++++++++++------ 9 files changed, 478 insertions(+), 266 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 464cfbc4..1a8da271 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -183,7 +183,6 @@ import org.labkey.panoramapublic.proteomexchange.ChemElement; import org.labkey.panoramapublic.proteomexchange.ExperimentModificationGetter; import org.labkey.panoramapublic.proteomexchange.Formula; -import org.labkey.panoramapublic.ncbi.NcbiConstants; import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; @@ -10458,7 +10457,7 @@ public abstract static class UpdateDatasetStatusAction extends ConfirmAction + public static class SetupMockNcbiServiceAction extends ReadOnlyApiAction { @Override - public Object execute(SetupMockForm form, BindException errors) + public Object execute(Object form, BindException errors) { - if (form.isCheckNcbiReachable() && NcbiPublicationSearchServiceImpl.isNcbiReachable()) - { - return new ApiSimpleResponse("mock", false); - } NcbiPublicationSearchServiceImpl.setInstance(new MockNcbiPublicationSearchService()); return new ApiSimpleResponse("mock", true); } } - public static class SetupMockForm + @RequiresSiteAdmin + public static class RestoreNcbiServiceAction extends ReadOnlyApiAction { - private boolean _checkNcbiReachable; - - public boolean isCheckNcbiReachable() - { - return _checkNcbiReachable; - } - - public void setCheckNcbiReachable(boolean checkNcbiReachable) + @Override + public Object execute(Object form, BindException errors) { - _checkNcbiReachable = checkNcbiReachable; + NcbiPublicationSearchServiceImpl.setInstance(new NcbiPublicationSearchServiceImpl()); + return new ApiSimpleResponse("restored", true); } } @RequiresSiteAdmin - public static class RestoreNcbiServiceAction extends ReadOnlyApiAction + public static class RegisterMockPublicationAction extends ReadOnlyApiAction { @Override - public Object execute(Object form, BindException errors) + public Object execute(RegisterMockPublicationForm form, BindException errors) { - NcbiPublicationSearchServiceImpl.setInstance(new NcbiPublicationSearchServiceImpl()); - return new ApiSimpleResponse("restored", true); + NcbiPublicationSearchService service = NcbiPublicationSearchServiceImpl.getInstance(); + if (!(service instanceof MockNcbiPublicationSearchService mock)) + { + errors.reject(ERROR_MSG, "Mock NCBI service is not available. Call setupMockNcbiService first."); + return null; + } + mock.register(form.getDatabase(), form.getId(), form.getSearchKey(), + form.getPmid(), form.getTitle(), form.getAuthors(), + form.getPubDate(), form.getSource(), form.getJournalFull(), + form.getCitation()); + return new ApiSimpleResponse("registered", true); } } + public static class RegisterMockPublicationForm + { + private String _database; + private String _id; + private String _searchKey; + private String _pmid; + private String _title; + private String _authors; + private String _pubDate; + private String _source; + private String _journalFull; + private String _citation; + + public String getDatabase() { return _database; } + public void setDatabase(String database) { _database = database; } + + public String getId() { return _id; } + public void setId(String id) { _id = id; } + + public String getSearchKey() { return _searchKey; } + public void setSearchKey(String searchKey) { _searchKey = searchKey; } + + public String getPmid() { return _pmid; } + public void setPmid(String pmid) { _pmid = pmid; } + + public String getTitle() { return _title; } + public void setTitle(String title) { _title = title; } + + public String getAuthors() { return _authors; } + public void setAuthors(String authors) { _authors = authors; } + + public String getPubDate() { return _pubDate; } + public void setPubDate(String pubDate) { _pubDate = pubDate; } + + public String getSource() { return _source; } + public void setSource(String source) { _source = source; } + + public String getJournalFull() { return _journalFull; } + public void setJournalFull(String journalFull) { _journalFull = journalFull; } + + public String getCitation() { return _citation; } + public void setCitation(String citation) { _citation = citation; } + } + public static class TestCase extends AbstractActionPermissionTest { @Override diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index e38a28e6..7db3b4cf 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -374,7 +374,7 @@ public static void postPublicationDismissalMessage(@NotNull Journal journal, @No messageBody.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); messageBody.append("Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public."); - messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(publicationMatch.getPublicationLabel()); + messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(publicationMatch.getPublicationIdLabel()); if (!StringUtils.isBlank(publicationMatch.getCitation())) { messageBody.append(NL).append(publicationMatch.getCitation()); @@ -437,12 +437,12 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations if (articleMatch != null) { // Message variant when a publication was found - message.append("We found a paper that appears to be associated with your private data on Panorama Public.") + message.append("We found a paper that appears to be associated with your private data on Panorama Public (").append(shortUrl).append(").") .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) .append(NL2).append(bold("Publication Found:")).append(" ") .append(articleMatch.getCitation() != null ? link(articleMatch.getCitation(), articleMatch.getPublicationUrl()) - : link(articleMatch.getPublicationLabel(), articleMatch.getPublicationUrl())) + : link(articleMatch.getPublicationIdLabel(), articleMatch.getPublicationUrl())) .append(NL2).append("If this is indeed your paper, congratulations! We encourage you to make your data public so the research community can access it alongside your paper. ") .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") .append(bold(link("Make Data Public", makePublicLink))).append(".") diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 9d042d22..00be1a53 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -98,14 +98,11 @@ public void setPublicationType(String publicationType) _publicationType = publicationType; } - public String getPublicationLabel() + public String getPublicationIdLabel() { NcbiConstants.DB type = NcbiConstants.DB.fromString(_publicationType); - if (type == null) - { - return _publicationType != null ? _publicationType : ""; - } - return type.getLabel(); + String label = type == null ? (_publicationType != null ? _publicationType : "") : type.getLabel(); + return label + " ID " + getPotentialPublicationId(); } public String getPublicationMatchInfo() diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java index 2e44fbfa..29d82a23 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java @@ -6,44 +6,107 @@ import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** - * Mock implementation of {@link NcbiPublicationSearchService} that returns canned data for PMID 23689285 - * (Abbatiello et al., Mol Cell Proteomics 2013). Used by Selenium tests when NCBI is not reachable. - * + * Mock implementation of {@link NcbiPublicationSearchService} that returns canned data registered by tests. + * Used by Selenium tests when running on TeamCity. * Extends {@link NcbiPublicationSearchServiceImpl} and only overrides the two methods that make HTTP calls * to NCBI: {@link #getJson(String)} (used by ESearch/ESummary) and {@link #getCitation(String, DB)} - * (used by the Citation Exporter API). All search logic, filtering, author/title verification, and + * (used for the Citation Exporter API). All search logic, filtering, author/title verification, and * priority filtering run through the real implementation code. + * Tests register mock articles via {@link #register}, providing the database, ID, search key, + * metadata fields, and citation. The mock builds internal lookup maps from this data and returns + * appropriate JSON responses when the real search logic calls {@code getJson()} or {@code getCitation()}. */ public class MockNcbiPublicationSearchService extends NcbiPublicationSearchServiceImpl { - private static final String PMID = "23689285"; - private static final String PMC_ID = "3769335"; - - private static final String CITATION = "Abbatiello SE, Mani DR, Schilling B, Maclean B, Zimmerman LJ, " + - "Feng X, Cusack MP, Sedransk N, Hall SC, Addona T, Allen S, Dodder NG, Ghosh M, Held JM, Hedrick V, " + - "Inerowicz HD, Jackson A, Keshishian H, Kim JW, Lyssand JS, Riley CP, Rudnick P, Sadowski P, " + - "Shaddox K, Smith D, Tomazela D, Wahlander A, Waldemarson S, Whitwell CA, You J, Zhang S, " + - "Kinsinger CR, Mesri M, Rodriguez H, Borchers CH, Buck C, Fisher SJ, Gibson BW, Liebler D, " + - "Maccoss M, Neubert TA, Paulovich A, Regnier F, Skates SJ, Tempst P, Wang M, Carr SA. " + - "Design, implementation and multisite evaluation of a system suitability protocol for the quantitative " + - "assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS). " + - "Mol Cell Proteomics. 2013 Sep;12(9):2623-39. doi: 10.1074/mcp.M112.027078. Epub 2013 May 20. " + - "PMID: 23689285; PMCID: PMC3769335."; - - private static final String ARTICLE_TITLE = "Design, implementation and multisite evaluation of a system suitability " + - "protocol for the quantitative assessment of instrument performance in liquid chromatography-multiple " + - "reaction monitoring-MS (LC-MRM-MS)."; + // ESearch: searchKey -> list of IDs (per database) + private final Map> _pmcSearchResults = new HashMap<>(); + private final Map> _pubmedSearchResults = new HashMap<>(); + + // ESummary: ID -> metadata JSONObject (per database) + private final Map _pmcMetadata = new HashMap<>(); + private final Map _pubmedMetadata = new HashMap<>(); + + // Citations: PMID -> citation string + private final Map _citations = new HashMap<>(); + + /** + * Register a mock article. The mock stores the data in internal lookup maps used by + * {@link #getJson(String)} and {@link #getCitation(String, DB)}. + * @param database "pmc" or "pubmed" — the NCBI database this article is in + * @param id the article ID in the given database (numeric ID for pmc or pubmed) + * @param searchKey what ESearch query term finds this article (e.g. PXD ID for PMC, author last name for PubMed) + * @param pmid linked PubMed ID for PMC articles. PMC articles usually have a corresponding PMID in their + * metadata (articleids array), which the real code extracts via {@code extractPubMedId()}. + * Null for pubmed articles (where {@code id} is already the PMID). + * @param title article title + * @param authors comma-separated author list (e.g. "Abbatiello SE,Mani DR,Schilling B") + * @param pubDate publication date string (e.g. "2013 Sep") + * @param source journal abbreviation used as ESummary "source" field (e.g. "Mol Cell Proteomics", "bioRxiv") + * @param journalFull full journal name used as ESummary "fulljournalname" field + * @param citation NLM citation string (for getCitation, keyed by PMID; null if not applicable) + */ + public void register(String database, String id, String searchKey, + @Nullable String pmid, String title, String authors, + String pubDate, String source, String journalFull, + @Nullable String citation) + { + boolean isPmc = "pmc".equalsIgnoreCase(database); + + // Build ESummary metadata + JSONObject metadata = new JSONObject(); + metadata.put("title", title); + metadata.put("sorttitle", title.toLowerCase()); + metadata.put("source", source); + metadata.put("fulljournalname", journalFull); + metadata.put("pubdate", pubDate); + + // Authors array + JSONArray authorsArray = new JSONArray(); + for (String author : authors.split(",")) + { + authorsArray.put(new JSONObject().put("name", author.trim())); + } + metadata.put("authors", authorsArray); + + // For PMC articles, add articleids with PMC ID and optional PMID + if (isPmc) + { + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC" + id)); + if (pmid != null) + { + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", pmid)); + } + metadata.put("articleids", articleIds); + } + + // Store in appropriate maps + if (isPmc) + { + _pmcSearchResults.computeIfAbsent(searchKey, k -> new ArrayList<>()).add(id); + _pmcMetadata.put(id, metadata); + } + else + { + _pubmedSearchResults.computeIfAbsent(searchKey, k -> new ArrayList<>()).add(id); + _pubmedMetadata.put(id, metadata); + } + + // Store citation keyed by PMID + if (citation != null && pmid != null) + { + _citations.put(pmid, citation); + } + } /** - * Returns canned JSON for NCBI ESearch and ESummary API requests. - *
    - *
  • ESearch for PMC with a query containing "PXD010535" returns PMC ID 3769335
  • - *
  • ESummary for PMC ID 3769335 returns article metadata (authors, title, pubdate, PMID)
  • - *
  • All other requests return empty results
  • - *
- * This allows the real search logic in {@link NcbiPublicationSearchServiceImpl} to run against mock data. + * Returns canned JSON for NCBI ESearch and ESummary API requests based on registered mock data. */ @Override protected JSONObject getJson(String url) throws IOException @@ -60,79 +123,47 @@ else if (url.contains("esummary.fcgi")) } /** - * Returns the canned NLM citation for PMID 23689285. Returns null for any other publication ID. - * Overrides the real implementation which makes an HTTP call to the NCBI Citation Exporter API. + * Returns the registered citation for the given PubMed ID, or null if not registered. */ @Override - public @Nullable String getCitation(String publicationId, DB database) + public @Nullable String getCitation(String pubMedId, DB database) { - if (PMID.equals(publicationId)) - { - return CITATION; - } - return null; + return _citations.get(pubMedId); } - /** - * Handle mock ESearch requests. Returns PMC ID 3769335 when query contains "PXD010535", - * empty results otherwise. - */ private JSONObject handleESearch(String url) { - JSONObject result = new JSONObject(); - JSONObject esearchResult = new JSONObject(); + boolean isPmc = url.contains("db=pmc"); + Map> searchMap = isPmc ? _pmcSearchResults : _pubmedSearchResults; - if (url.contains("PXD010535")) + JSONArray idList = new JSONArray(); + for (Map.Entry> entry : searchMap.entrySet()) { - esearchResult.put("idlist", new JSONArray().put(PMC_ID)); - } - else - { - esearchResult.put("idlist", new JSONArray()); + if (url.contains(entry.getKey())) + { + entry.getValue().forEach(idList::put); + } } - result.put("esearchresult", esearchResult); - return result; + JSONObject esearchResult = new JSONObject(); + esearchResult.put("idlist", idList); + return new JSONObject().put("esearchresult", esearchResult); } - /** - * Handle mock ESummary requests. Returns article metadata for PMC ID 3769335. - */ private JSONObject handleESummary(String url) { - JSONObject response = new JSONObject(); - JSONObject result = new JSONObject(); + boolean isPmc = url.contains("db=pmc"); + Map metadataMap = isPmc ? _pmcMetadata : _pubmedMetadata; - if (url.contains(PMC_ID)) + JSONObject result = new JSONObject(); + for (Map.Entry entry : metadataMap.entrySet()) { - JSONObject articleData = new JSONObject(); - articleData.put("title", ARTICLE_TITLE); - articleData.put("sorttitle", ARTICLE_TITLE.toLowerCase()); - articleData.put("source", "Mol Cell Proteomics"); - articleData.put("fulljournalname", "Molecular & cellular proteomics : MCP"); - articleData.put("pubdate", "2013 Sep"); - articleData.put("epubdate", "2013 May 20"); - - // Authors — include first 3 plus a few more for realism - JSONArray authors = new JSONArray(); - authors.put(new JSONObject().put("name", "Abbatiello SE")); - authors.put(new JSONObject().put("name", "Mani DR")); - authors.put(new JSONObject().put("name", "Schilling B")); - authors.put(new JSONObject().put("name", "Maclean B")); - authors.put(new JSONObject().put("name", "Zimmerman LJ")); - authors.put(new JSONObject().put("name", "Carr SA")); - articleData.put("authors", authors); - - // Article IDs — PMC ID and PubMed ID - JSONArray articleIds = new JSONArray(); - articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC" + PMC_ID)); - articleIds.put(new JSONObject().put("idtype", "pmid").put("value", PMID)); - articleData.put("articleids", articleIds); - - result.put(PMC_ID, articleData); + if (url.contains(entry.getKey())) + { + result.put(entry.getKey(), entry.getValue()); + } } - response.put("result", result); - return response; + return new JSONObject().put("result", result); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java index 85f0f76f..7240b74c 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -73,7 +73,7 @@ public static void setInstance(NcbiPublicationSearchService impl) // NCBI API endpoints private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; - private static final String EINFO_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"; + // NCBI Literature Citation Exporter endpoints private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; @@ -82,7 +82,7 @@ public static void setInstance(NcbiPublicationSearchService impl) // API parameters private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec private static final int TIMEOUT_MS = 10000; // 10 seconds - private static final int REACHABILITY_TIMEOUT_MS = 5000; // 5 seconds + private static final String NCBI_EMAIL = "panorama@proteinms.net"; // Preprint indicators @@ -120,34 +120,6 @@ private static Logger getLog(@Nullable Logger logger) return logger != null ? logger : LOG; } - /** - * Checks whether the NCBI E-utilities API is reachable. - * Performs a HEAD request to the einfo endpoint with a 5-second timeout. - * @return true if NCBI responds with HTTP 200, false otherwise - */ - public static boolean isNcbiReachable() - { - HttpURLConnection conn = null; - try - { - URL url = new URL(EINFO_URL); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("HEAD"); - conn.setConnectTimeout(REACHABILITY_TIMEOUT_MS); - conn.setReadTimeout(REACHABILITY_TIMEOUT_MS); - return conn.getResponseCode() == HttpURLConnection.HTTP_OK; - } - catch (IOException e) - { - LOG.info("NCBI is not reachable: " + e.getMessage()); - return false; - } - finally - { - if (conn != null) conn.disconnect(); - } - } - @Override public @Nullable String getCitation(String publicationId, DB database) { @@ -165,6 +137,7 @@ public static boolean isNcbiReachable() URL url = new URL(queryUrl); conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); + conn.setRequestProperty("User-Agent", "PanoramaPublic/1.0"); int status = conn.getResponseCode(); @@ -409,6 +382,7 @@ protected JSONObject getJson(String url) throws IOException .build()) { HttpGet get = new HttpGet(url); + get.setHeader("User-Agent", "PanoramaPublic/1.0"); return client.execute(get, response -> { int status = response.getCode(); if (status < 200 || status >= 300) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 52fb9bc0..4ba8e9ac 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -123,7 +123,7 @@ public String getPublicationUrl() return NcbiConstants.getPubmedLink(_publicationId); } - public String getPublicationLabel() + public String getPublicationIdLabel() { return _publicationType.getLabel() + " ID " + _publicationId; } @@ -133,7 +133,7 @@ public JSONObject toJson() JSONObject json = new JSONObject(); json.put("publicationId", _publicationId); json.put("publicationType", _publicationType.name()); - json.put("publicationLabel", getPublicationLabel()); + json.put("publicationIdLabel", getPublicationIdLabel()); json.put("publicationUrl", getPublicationUrl()); json.put("matchInfo", getMatchInfo()); if (_citation != null) diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp index 540d8918..bd6ca576 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp @@ -138,7 +138,7 @@ + ' href="' + LABKEY.Utils.encodeHtml(match.publicationUrl) + '"' + ' target="_blank"' + ' rel="noopener noreferrer">' - + LABKEY.Utils.encodeHtml(match.publicationLabel) + + LABKEY.Utils.encodeHtml(match.publicationIdLabel) + ''; matchHtml += '
' diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java index 28b5b520..a8a2efae 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java @@ -37,7 +37,9 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -615,6 +617,24 @@ protected void verifyIsPublicColumn(String panoramaPublicProject, String experim assertEquals(isPublic ? "Yes" : "No", expListTable.getDataAsText(0, "Public")); } + /** + * Navigate to the Private Data Reminder Settings page and read the current form values. + * Returns a map with keys: extensionLength, delayUntilFirstReminder, reminderFrequency, enablePublicationSearch. + */ + protected Map getPrivateDataReminderSettings() + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + + Map settings = new HashMap<>(); + settings.put("extensionLength", getFormElement(Locator.input("extensionLength"))); + settings.put("delayUntilFirstReminder", getFormElement(Locator.input("delayUntilFirstReminder"))); + settings.put("reminderFrequency", getFormElement(Locator.input("reminderFrequency"))); + settings.put("enablePublicationSearch", String.valueOf(Locator.checkboxByName("enablePublicationSearch").findElement(getDriver()).isSelected())); + return settings; + } + protected void savePrivateDataReminderSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency) { savePrivateDataReminderSettings(extensionLength, delayUntilFirstReminder, reminderFrequency, false); diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java index e1f46e71..1ab08683 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java @@ -3,7 +3,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.labkey.remoteapi.CommandException; -import org.labkey.remoteapi.CommandResponse; import org.labkey.remoteapi.Connection; import org.labkey.remoteapi.SimpleGetCommand; import org.labkey.test.BaseWebDriverTest; @@ -17,43 +16,70 @@ import org.labkey.test.util.DataRegionTable; import java.io.IOException; +import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.labkey.test.util.PermissionsHelper.READER_ROLE; @Category({External.class, MacCossLabModules.class}) -@BaseWebDriverTest.ClassTimeout(minutes = 7) +@BaseWebDriverTest.ClassTimeout(minutes = 10) public class PublicationSearchTest extends PanoramaPublicBaseTest { private static final String SKY_FILE = "MRMer.zip"; - private static final String SUBMITTER_USER = "submitter@panoramapublic.test"; + private static final String SUBMITTER_1 = "submitter1@panoramapublic.test"; + private static final String SUBMITTER_2 = "submitter2@panoramapublic.test"; private static final String ADMIN_USER = "admin@panoramapublic.test"; - private static final String EXPERIMENT_TITLE = "Design, Implementation and Multisite Evaluation of a System Suitability Protocol" + - " for the Quantitative Assessment of Instrument Performance in Liquid Chromatography-Multiple Reaction Monitoring-MS (LC-MRM-MS)"; - private static final String PXD_ID = "PXD010535"; - private static final String PMID = "23689285"; - - private static final String SOURCE_FOLDER = "Pub Search Source"; - private static final String TARGET_FOLDER = "Pub Search Copy"; + // --- Dataset 1: Abbatiello et al. (Mol Cell Proteomics 2013) --- + // Found via PubMed fallback (author + title match). Paper does not reference PXD010535 in PMC. + private static final String FOLDER_1 = "System Suitability Study"; + private static final String TARGET_FOLDER_1 = "System Suitability Copy"; + private static final String TITLE_1 = "Design, implementation and multisite evaluation of a system suitability protocol " + + "for the quantitative assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS)"; + private static final String PXD_1 = "PXD010535"; + private static final String PMID_1 = "23689285"; + private static final String ARTICLE_TITLE_1 = TITLE_1; // Same as experiment title + private static final String CITATION_1 = "Abbatiello SE, Mani DR, Schilling B, Maclean B, Zimmerman LJ, " + + "Feng X, Cusack MP, Sedransk N, Hall SC, Addona T, Allen S, Dodder NG, Ghosh M, Held JM, Hedrick V, " + + "Inerowicz HD, Jackson A, Keshishian H, Kim JW, Lyssand JS, Riley CP, Rudnick P, Sadowski P, " + + "Shaddox K, Smith D, Tomazela D, Wahlander A, Waldemarson S, Whitwell CA, You J, Zhang S, " + + "Kinsinger CR, Mesri M, Rodriguez H, Borchers CH, Buck C, Fisher SJ, Gibson BW, Liebler D, " + + "Maccoss M, Neubert TA, Paulovich A, Regnier F, Skates SJ, Tempst P, Wang M, Carr SA. " + + "Design, implementation and multisite evaluation of a system suitability protocol for the quantitative " + + "assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS). " + + "Mol Cell Proteomics. 2013 Sep;12(9):2623-39. doi: 10.1074/mcp.M112.027078. Epub 2013 May 20. " + + "PMID: 23689285; PMCID: PMC3769335."; + + // --- Dataset 2: Wen et al. (Nat Commun 2025) --- + // Found via PMC search by PXD. PMC returns two results: Nature article and bioRxiv preprint. + // The preprint is filtered out by isPreprint(). + private static final String FOLDER_2 = "Carafe"; + private static final String TARGET_FOLDER_2 = "Carafe Copy"; + private static final String TITLE_2 = "Carafe: a tool for in silico spectral library generation for DIA proteomics"; + private static final String PXD_2 = "PXD056793"; + private static final String PMID_2 = "41198693"; + private static final String PMC_ID_2 = "12592563"; + private static final String PMC_ID_2_PREPRINT = "11507862"; + private static final String ARTICLE_TITLE_2 = "Carafe enables high quality in silico spectral library generation " + + "for data-independent acquisition proteomics."; // Not identical to the experiment title but keywords match + private static final String CITATION_2 = "Wen B, Hsu C, Shteynberg D, Zeng WF, Riffle M, Chang A, Mudge MC, " + + "Nunn BL, MacLean BX, Berg MD, Villén J, MacCoss MJ, Noble WS. " + + "Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics. " + + "Nat Commun. 2025 Nov 6;16(1):9815. doi: 10.1038/s41467-025-64928-4. " + + "PMID: 41198693; PMCID: PMC12592563."; private boolean _useMockNcbi = false; + private Map _originalReminderSettings = null; /** - * Tests the publication search flow: - * > search for publications - * > notify submitter - * > submitter dismisses suggestion - * > verify publication not suggested again - * Uses mock NCBI service when running on TeamCity, and NCBI API is not reachable. - * The mock service returns saved data for PMID 23689285 (Abbatiello et al., Mol Cell Proteomics 2013). - * The test dataset uses: - * - Submitter: Birgit Schilling (author #3 on the paper) - * - PXD: PXD010535 - * - Title matches the paper title + * Tests the publication search flow with two datasets: + * Dataset 1 (Abbatiello et al.): search → notify → dismiss → verify not suggested again + * Dataset 2 (Wen et al.): verify publication found via pipeline job reminders + * Uses mock NCBI service when running on TeamCity. */ @Test public void testPublicationSearchAndDismiss() @@ -64,130 +90,173 @@ public void testPublicationSearchAndDismiss() permissionsHelper.setSiteGroupPermissions("Guests", READER_ROLE); // Step 1: Set up mock NCBI service if running on TeamCity - // On dev machine only mock if NCBI is not reachable. setupMockNcbiService(); - // Step 2: Create folder, submit to Panorama Public, and copy + // Step 2: Create dataset 1 folder, submit to Panorama Public, and copy String testProject = getProjectName(); - String shortAccessUrl = setupFolderSubmitAndCopy(testProject, SOURCE_FOLDER, TARGET_FOLDER, - EXPERIMENT_TITLE, SUBMITTER_USER, "Schilling", "Birgit", ADMIN_USER, SKY_FILE); - - // Navigate to the Panorama Public copy folder and get the experiment ID - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - goToExperimentDetailsPage(); - int exptId = Integer.parseInt(portalHelper.getUrlParam("id")); - - // Step 3: Assign PXD ID to the experiment via API - assignPxdId(panoramaPublicProject, TARGET_FOLDER, exptId, PXD_ID); - - // Step 4: Search publications for the dataset — should find PMID 23689285 - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), - "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); - waitForText("Publications Matches for Dataset"); - - // Verify the publication was found - assertTextPresent(PMID); - // assertTextPresent("ProteomeXchange ID"); + String shortAccessUrl1 = setupFolderSubmitAndCopy(testProject, FOLDER_1, TARGET_FOLDER_1, + TITLE_1, SUBMITTER_1, "Schilling", "Birgit", ADMIN_USER, SKY_FILE); + int exptId1 = getExperimentId(panoramaPublicProject, TARGET_FOLDER_1); + assignPxdId(panoramaPublicProject, TARGET_FOLDER_1, exptId1, PXD_1); + + // Step 3: Create dataset 2 folder, submit to Panorama Public, and copy + String shortAccessUrl2 = setupFolderSubmitAndCopy(testProject, FOLDER_2, TARGET_FOLDER_2, + TITLE_2, SUBMITTER_2, "Wen", "Bo", ADMIN_USER, SKY_FILE); + int exptId2 = getExperimentId(panoramaPublicProject, TARGET_FOLDER_2); + assignPxdId(panoramaPublicProject, TARGET_FOLDER_2, exptId2, PXD_2); + + // Step 4: Search publications for dataset 1 — should find PMID 23689285 via PubMed fallback + searchPublicationsForDataset(panoramaPublicProject, TARGET_FOLDER_1, exptId1); + assertTextPresent(PMID_1); assertTextPresent("Author, Title"); + assertTextNotPresent("ProteomeXchange ID"); // Paper does not contain the PXD // Step 5: Select the publication and notify the submitter - click(Locator.radioButtonByNameAndValue("publicationId", PMID)); + click(Locator.radioButtonByNameAndValue("publicationId", PMID_1)); clickButton("Notify Submitter"); // Step 6: Verify the reminder message was posted in the support thread - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); - waitForText("Submitted - " + shortAccessUrl); + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); assertTextPresent("Action Required: Publication Found for Your Data on Panorama Public"); assertTextPresent("We found a paper that appears to be associated with your private data on Panorama Public"); - assertTextPresent("Abbatiello SE, Mani DR, Schilling B"); // Verify the citation is included in the message - assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID)); // Verify the PubMed link + assertTextPresent("Abbatiello SE, Mani DR, Schilling B"); + assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID_1)); // Step 7: Impersonate submitter, dismiss the publication suggestion - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); - impersonate(SUBMITTER_USER); - waitForText("Submitted - " + shortAccessUrl); + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + impersonate(SUBMITTER_1); + waitForText("Submitted - " + shortAccessUrl1); click(Locator.linkWithText("Dismiss Publication Suggestion")); - waitForText("Dismiss Publication Suggestion"); + waitForText("You are dismissing the publication suggestion for your data on Panorama Public"); + assertTextPresent(shortAccessUrl1); clickButton("OK", 0); waitForText("The publication suggestion has been dismissed"); stopImpersonating(); // Verify the dismissal notification was posted in the support thread - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); - waitForText("Submitted - " + shortAccessUrl); + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); assertTextPresentInThisOrder( - // "Abbatiello SE, Mani DR, Schilling B", // From previous message "Publication Suggestion Dismissed", "Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public"); - // "Abbatiello SE, Mani DR, Schilling B"); // Citation should appear within the dismissal message, after the title and body - // Step 8: Search publications for the dataset again and verify the dismissed publication is flagged - goToProjectFolder(panoramaPublicProject, TARGET_FOLDER); - beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), - "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); - waitForText("Publications Matches for Dataset"); - assertTextPresent(PMID); - // The dismissed publication should show "Yes" in the User Dismissed column + // Step 8: Search publications for dataset 1 again and verify the dismissed publication is flagged + searchPublicationsForDataset(panoramaPublicProject, TARGET_FOLDER_1, exptId1); + assertTextPresent(PMID_1); assertTextPresent("User Dismissed"); - // Find the row containing the PMID radio button and verify the last cell contains "Yes" - Locator dismissedCell = Locator.xpath("//tr[.//input[@value='" + PMID + "']]/td[last()]"); + // Verify the dismissed cell contains "Yes" + Locator dismissedCell = Locator.xpath("//tr[.//input[@value='" + PMID_1 + "']]/td[last()]"); assertEquals("Yes", getText(dismissedCell)); // Verify that we cannot notify the submitter for a dismissed publication - click(Locator.radioButtonByNameAndValue("publicationId", PMID)); + click(Locator.radioButtonByNameAndValue("publicationId", PMID_1)); clickButton("Notify Submitter"); - assertTextPresent("has already dismissed the publication suggestion"); + assertTextPresent("The user has already dismissed the publication suggestion PubMed ID " + PMID_1 + " for this dataset"); - // Step 9: Run the post reminders pipeline job and verify the dismissed publication is skipped - // Set reminder settings: delayUntilFirstReminder=0, reminderFrequency=0, enablePublicationSearch=true + // Step 9: Run the post reminders pipeline job with publication search enabled + // Save current settings so they can be restored in doCleanup + _originalReminderSettings = getPrivateDataReminderSettings(); savePrivateDataReminderSettings("2", "0", "0", true); // Post reminders with publication search enabled goToSendRemindersPage(panoramaPublicProject); DataRegionTable table = new DataRegionTable("ExperimentAnnotations", getDriver()); table.checkAllOnPage(); - checkCheckbox(Locator.checkboxByName("searchPublications")); + assertChecked(Locator.checkboxByName("searchPublications")); clickButton("Post Reminders", 0); waitForPipelineJobsToComplete(1, "Post private data reminder messages", false); - // Verify the pipeline job log contains the skip message for the dismissed publication + // Verify the pipeline job log contains the skip message for the dismissed dataset 1 publication goToProjectHome(panoramaPublicProject); goToDataPipeline(); goToDataPipeline().clickStatusLink(0); - String skipMessage = String.format("User has dismissed publication suggestion for experiment %d; skipping search", exptId); + String skipMessage = String.format("User has dismissed publication suggestion for experiment %d; skipping search", exptId1); assertTextPresent(skipMessage); + + // Step 10: Verify that the pipeline job found a publication for dataset 2 and posted a message + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_2); + waitForText("Submitted - " + shortAccessUrl2); + assertTextPresent("Action Required: Publication Found for Your Data on Panorama Public"); + assertTextPresent("We found a paper that appears to be associated with your private data on Panorama Public"); + assertTextPresent("Wen B, Hsu C, Shteynberg D"); + assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID_2)); } /** - * POST to SetupMockNcbiServiceAction. + * Navigate to the Panorama Public copy folder and get the experiment ID. + */ + private int getExperimentId(String panoramaPublicProject, String targetFolder) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + goToExperimentDetailsPage(); + return Integer.parseInt(portalHelper.getUrlParam("id")); + } + + /** + * Assign a PXD ID to an experiment using the UpdatePxDetails page. + * Cannot use UpdateRowsCommand because the ExperimentAnnotations table is not updatable via the HTTP-based APIs. + */ + private void assignPxdId(String projectName, String folderName, int exptId, String pxdId) + { + goToProjectFolder(projectName, folderName); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "updatePxDetails", Map.of("id", String.valueOf(exptId)))); + waitForText("Update ProteomeXchange Details"); + setFormElement(Locator.input("pxId"), pxdId); + clickButton("Update"); + log("Assigned PXD ID " + pxdId + " to experiment " + exptId); + } + + /** + * Navigate to the search publications page for a dataset. + */ + private void searchPublicationsForDataset(String panoramaPublicProject, String targetFolder, int exptId) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); + waitForText("Publications Matches for Dataset"); + } + + /** + * Navigate to the support messages for a dataset. + */ + private void goToSupportMessages(String panoramaPublicProject, String targetFolder) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); + } + + /** + * On TeamCity: set up the mock NCBI service and register mock publication data. + * On dev machine: verify that NCBI is reachable; fail with a clear message if not. */ private void setupMockNcbiService() { - boolean mock = TestProperties.isTestRunningOnTeamCity(); - if (!mock) return; - boolean checkNcbiReachable = false; + boolean useMockNcbiService = TestProperties.isTestRunningOnTeamCity(); + if (useMockNcbiService) + { + installMockNcbiService(); + } + else + { + assertTrue("NCBI E-utilities API is not reachable. Check your network connection.", isNcbiReachable()); + log("Using real NCBI service"); + } + } + private void installMockNcbiService() + { try { Connection connection = createDefaultConnection(); SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "setupMockNcbiService"); - command.setParameters(Map.of("checkNcbiReachable", checkNcbiReachable)); - CommandResponse response = command.execute(connection, "/"); - Object mockValue = response.getProperty("mock"); - _useMockNcbi = Boolean.TRUE.equals(mockValue); - if (_useMockNcbi) - { - log("Using mock NCBI service" + (checkNcbiReachable ? " (NCBI not reachable)" : "")); - } - else - { - log("Using real NCBI service (NCBI is reachable)"); - } + command.execute(connection, "/"); + _useMockNcbi = true; + log("Using mock NCBI service"); + registerMockPublications(); } catch (IOException | CommandException e) { @@ -195,37 +264,107 @@ private void setupMockNcbiService() } } + private static boolean isNcbiReachable() + { + try + { + java.net.HttpURLConnection conn = (java.net.HttpURLConnection) + new java.net.URL("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi").openConnection(); + conn.setRequestMethod("GET"); + conn.setRequestProperty("User-Agent", "PanoramaPublic/1.0"); + conn.setConnectTimeout(5000); + conn.setReadTimeout(5000); + int responseCode = conn.getResponseCode(); + boolean reachable = responseCode >= 200 && responseCode < 300; + conn.disconnect(); + return reachable; + } + catch (IOException e) + { + return false; + } + } + /** - * POST to RestoreNcbiServiceAction to restore the real NCBI service. + * Register mock publication data with the mock NCBI service. */ - private void restoreNcbiService() + private void registerMockPublications() + { + // Dataset 1: PMID 23689285 (Abbatiello et al.) — found via PubMed fallback (author + title match) + // Paper does not reference PXD010535 in PMC. + registerMockPublication("pubmed", PMID_1, "Schilling", + null, ARTICLE_TITLE_1, + "Abbatiello SE,Mani DR,Schilling B,Maclean B,Zimmerman LJ,Carr SA", + "2013 Sep", "Mol Cell Proteomics", "Molecular & cellular proteomics : MCP", + CITATION_1); + + // Dataset 2: PMID 41198693 (Wen et al.) — found via PMC search by PXD ID + // PMC search for PXD056793 returns two articles: Nature + bioRxiv preprint. + + // Nature article (published) — PMC ID 12592563, PMID 41198693 + registerMockPublication("pmc", PMC_ID_2, PXD_2, + PMID_2, ARTICLE_TITLE_2, + "Wen B,Hsu C,Shteynberg D,Zeng WF,Riffle M,Chang A,Mudge MC,Nunn BL,MacLean BX,Berg MD,Villén J,MacCoss MJ,Noble WS", + "2025 Nov 6", "Nat Commun", "Nature communications", + CITATION_2); + + // bioRxiv preprint — PMC ID 11507862, PMID 39463980 (will be filtered out by isPreprint) + registerMockPublication("pmc", PMC_ID_2_PREPRINT, PXD_2, + "39463980", ARTICLE_TITLE_2, + "Wen B,Hsu C,Shteynberg D,Zeng WF,Riffle M,Chang A,Mudge M,Nunn BL,MacLean BX,Berg MD,Villén J,MacCoss MJ,Noble WS", + "2025 Aug 4", "bioRxiv", "bioRxiv : the preprint server for biology", + null); + } + + /** + * Register a single mock article with the mock NCBI service via the RegisterMockPublicationAction API. + */ + private void registerMockPublication(String database, String id, String searchKey, + String pmid, String title, String authors, + String pubDate, String source, String journalFull, + String citation) { try { Connection connection = createDefaultConnection(); - SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "restoreNcbiService"); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "registerMockPublication"); + Map params = new HashMap<>(); + params.put("database", database); + params.put("id", id); + params.put("searchKey", searchKey); + params.put("title", title); + params.put("authors", authors); + params.put("pubDate", pubDate); + params.put("source", source); + params.put("journalFull", journalFull); + if (pmid != null) params.put("pmid", pmid); + if (citation != null) params.put("citation", citation); + command.setParameters(params); command.execute(connection, "/"); - log("Restored real NCBI service"); + log("Registered mock publication: " + database + " " + id); } catch (IOException | CommandException e) { - log("Warning: Failed to restore NCBI service: " + e.getMessage()); + fail("Failed to register mock publication: " + e.getMessage()); } } /** - * Assign a PXD ID to an experiment using the UpdatePxDetails page. - * Cannot use UpdateRowsCommand because the ExperimentAnnotations table is not updatable via the HTTP-based APIs. + * Restore the real NCBI service. */ - private void assignPxdId(String projectName, String folderName, int exptId, String pxdId) + private void restoreNcbiService() { - goToProjectFolder(projectName, folderName); - beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), - "updatePxDetails", Map.of("id", String.valueOf(exptId)))); - waitForText("Update ProteomeXchange Details"); - setFormElement(Locator.input("pxId"), pxdId); - clickButton("Update"); - log("Assigned PXD ID " + pxdId + " to experiment " + exptId); + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "restoreNcbiService"); + command.execute(connection, "/"); + log("Restored real NCBI service"); + } + catch (IOException | CommandException e) + { + log("Warning: Failed to restore NCBI service: " + e.getMessage()); + } } @Override @@ -236,9 +375,16 @@ protected void doCleanup(boolean afterTest) throws TestTimeoutException restoreNcbiService(); } - // TODO: Reset the reminder settings to not search for publications. + if (_originalReminderSettings != null) + { + savePrivateDataReminderSettings( + _originalReminderSettings.get("extensionLength"), + _originalReminderSettings.get("delayUntilFirstReminder"), + _originalReminderSettings.get("reminderFrequency"), + Boolean.parseBoolean(_originalReminderSettings.get("enablePublicationSearch"))); + } - _userHelper.deleteUsers(false, SUBMITTER_USER, ADMIN_USER); + _userHelper.deleteUsers(false, SUBMITTER_1, SUBMITTER_2, ADMIN_USER); super.doCleanup(afterTest); } } From 5a24e09359fc87c781eaf6e1db2848f34da80ac6 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Wed, 4 Mar 2026 13:53:50 -0800 Subject: [PATCH 21/27] - Reset settings after test - Fixed citation retrieval through mock service - Fixed confirm messages --- .../PanoramaPublicController.java | 23 ++++++++++--------- .../MockNcbiPublicationSearchService.java | 12 +++++++--- .../panoramapublic/PublicationSearchTest.java | 13 +++++++---- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 1a8da271..b4cb0220 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10457,7 +10457,7 @@ public abstract static class UpdateDatasetStatusAction extends ConfirmAction Date: Wed, 4 Mar 2026 18:23:16 -0800 Subject: [PATCH 22/27] - Look for "sortpubdate" and "sortdate" for date parsing - Added "tool" as parameter to EUtils requests - Require at least two keywords in title for keyword match - Updated unit tests --- .../NcbiPublicationSearchServiceImpl.java | 338 +++++++++--------- 1 file changed, 178 insertions(+), 160 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java index 7240b74c..0174fb16 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -40,7 +40,7 @@ import java.util.Comparator; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -74,7 +74,6 @@ public static void setInstance(NcbiPublicationSearchService impl) private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; - // NCBI Literature Citation Exporter endpoints private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=citation&id="; @@ -83,7 +82,10 @@ public static void setInstance(NcbiPublicationSearchService impl) private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec private static final int TIMEOUT_MS = 10000; // 10 seconds - private static final String NCBI_EMAIL = "panorama@proteinms.net"; + // NCBI suggests using the 'tool' and 'email' parameters on E-utilities URLs + // https://www.nlm.nih.gov/dataguide/eutilities/utilities.html + private static final String TOOL = "PanoramaPublic"; + private static final String EMAIL = "panorama@proteinms.net"; // Preprint indicators private static final String[] PREPRINT_INDICATORS = { @@ -94,6 +96,7 @@ public static void setInstance(NcbiPublicationSearchService impl) // Title keyword extraction private static final int MIN_KEYWORD_LENGTH = 3; private static final double KEYWORD_MATCH_THRESHOLD = 0.6; // 60% of keywords must match + private static final int MIN_KEYWORDS_FOR_MATCH = 2; // Stop words for title keyword matching. These words are not meaningful discriminators between papers. // Function words: articles, prepositions, conjunctions, auxiliary verbs, pronouns. @@ -101,11 +104,11 @@ public static void setInstance(NcbiPublicationSearchService impl) private static final Set TITLE_STOP_WORDS = Set.of( // Function words - "the", "this", "that", "these", "those", "its", // articles / determiners + "the", "this", "that", "these", "those", "its", // articles "for", "with", "from", "into", "upon", "via", "after", "about", // prepositions "between", "through", "across", "under", "over", "during", "and", "but", "than", // conjunctions - "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary / modal verbs + "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary verbs "our", "their", // pronouns "not", "all", "also", "both", "each", "how", "use", "used", // other function words "here", "well", "two", "one", "more", "most", "only", "such", "other", "which", @@ -123,6 +126,13 @@ private static Logger getLog(@Nullable Logger logger) @Override public @Nullable String getCitation(String publicationId, DB database) { + return getCitation(publicationId, database, null); + } + + public @Nullable String getCitation(String publicationId, DB database, @Nullable Logger logger) + { + Logger log = getLog(logger); + if (publicationId == null || !publicationId.matches(NcbiConstants.PUBMED_ID)) { return null; @@ -137,7 +147,6 @@ private static Logger getLog(@Nullable Logger logger) URL url = new URL(queryUrl); conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); - conn.setRequestProperty("User-Agent", "PanoramaPublic/1.0"); int status = conn.getResponseCode(); @@ -148,12 +157,12 @@ private static Logger getLog(@Nullable Logger logger) { response = IOUtils.toString(in, StandardCharsets.UTF_8); } - return parseCitation(response, publicationId, database); + return parseCitation(response, publicationId, database, log); } } catch (IOException e) { - LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); + log.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); } finally { @@ -171,6 +180,12 @@ public Pair getPubMedLinkAndCitation(String pubmedId) static String parseCitation(String response, String publicationId, DB database) { + return parseCitation(response, publicationId, database, null); + } + + static String parseCitation(String response, String publicationId, DB database, @Nullable Logger logger) + { + Logger log = getLog(logger); try { var jsonObject = new JSONObject(response); @@ -179,7 +194,7 @@ static String parseCitation(String response, String publicationId, DB database) } catch (JSONException e) { - LOG.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); + log.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); } return null; } @@ -226,7 +241,7 @@ public List searchForPublication(@NotNull ExperimentAnnotation for (PublicationMatch match : matchedArticles) { rateLimit(); - match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType())); + match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType(), log)); } } @@ -239,75 +254,53 @@ public List searchForPublication(@NotNull ExperimentAnnotation */ private @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) { - // Track PMC IDs found by each strategy - Map> pmcIdsByStrategy = new HashMap<>(); + Map searchTermsByStrategy = buildSearchTerms(expAnnotations); - // Step 1: Search PMC by PX ID - if (!StringUtils.isBlank(expAnnotations.getPxid())) - { - log.debug("Searching PMC by PX ID: {}", expAnnotations.getPxid()); - List ids = searchPmc(quote(expAnnotations.getPxid()), log); - if (!ids.isEmpty()) - { - pmcIdsByStrategy.put(MATCH_PX_ID, ids); - log.debug("Found {} PMC articles by PX ID", ids.size()); - } - rateLimit(); - } - - // Step 2: Search PMC by Panorama URL - if (expAnnotations.getShortUrl() != null) - { - String panoramaUrl = expAnnotations.getShortUrl().renderShortURL(); - log.debug("Searching PMC by Panorama URL: {}", panoramaUrl); - List ids = searchPmc(quote(panoramaUrl), log); - if (!ids.isEmpty()) - { - pmcIdsByStrategy.put(MATCH_PANORAMA_URL, ids); - log.debug("Found {} PMC articles by Panorama URL", ids.size()); - } - rateLimit(); - } - - // Step 3: Search PMC by DOI - if (!StringUtils.isBlank(expAnnotations.getDoi())) + // Search PMC using each strategy, accumulating results + Map> pmcIdsByStrategy = new HashMap<>(); + for (Map.Entry entry : searchTermsByStrategy.entrySet()) { - String doiUrl = expAnnotations.getDoi().startsWith("http") - ? expAnnotations.getDoi() - : DataCiteService.toUrl(expAnnotations.getDoi()); - log.debug("Searching PMC by DOI: {}", doiUrl); - List ids = searchPmc(quote(doiUrl), log); + String strategy = entry.getKey(); + String searchTerm = entry.getValue(); + log.debug("Searching PMC by {}: {}", strategy, searchTerm); + List ids = searchPmc(quote(searchTerm), log); if (!ids.isEmpty()) { - pmcIdsByStrategy.put(MATCH_DOI, ids); - log.debug("Found {} PMC articles by DOI", ids.size()); + pmcIdsByStrategy.put(strategy, ids); + log.debug("Found {} PMC articles by {}", ids.size(), strategy); } rateLimit(); } - // Accumulate unique PMC IDs and track which strategies found each - Set uniquePmcIds = new HashSet<>(); + // Build reverse map: PMC ID -> strategies that found it Map> idToStrategies = new HashMap<>(); + pmcIdsByStrategy.forEach((strategy, ids) -> + ids.forEach(id -> idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy))); - for (Map.Entry> entry : pmcIdsByStrategy.entrySet()) - { - String strategy = entry.getKey(); - for (String id : entry.getValue()) - { - uniquePmcIds.add(id); - idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy); - } - } - - log.info("Total unique PMC IDs found: {}", uniquePmcIds.size()); + log.info("Total unique PMC IDs found: {}", idToStrategies.size()); // Fetch and verify PMC articles - List pmcArticles = fetchAndVerifyPmcArticles(uniquePmcIds, idToStrategies, expAnnotations, log); + List pmcArticles = fetchAndVerifyPmcArticles(idToStrategies.keySet(), idToStrategies, expAnnotations, log); // Apply priority filtering return applyPriorityFiltering(pmcArticles, expAnnotations.getCreated(), log); } + private @NotNull Map buildSearchTerms(@NotNull ExperimentAnnotations expAnnotations) + { + Map terms = new LinkedHashMap<>(); + + if (!StringUtils.isBlank(expAnnotations.getPxid())) + terms.put(MATCH_PX_ID, expAnnotations.getPxid()); + if (expAnnotations.getShortUrl() != null) + terms.put(MATCH_PANORAMA_URL, expAnnotations.getShortUrl().renderShortURL()); + if (!StringUtils.isBlank(expAnnotations.getDoi())) + terms.put(MATCH_DOI, expAnnotations.getDoi().startsWith("http") + ? expAnnotations.getDoi() + : DataCiteService.toUrl(expAnnotations.getDoi())); + return terms; + } + /** * Search PubMed Central with the given query */ @@ -335,7 +328,8 @@ private List executeSearch(String query, String database, Logger log) "&term=" + encodedQuery + "&retmax=" + NcbiPublicationSearchService.MAX_RESULTS + "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + "&tool=" + TOOL + + "&email=" + URLEncoder.encode(EMAIL, StandardCharsets.UTF_8); try { @@ -352,7 +346,7 @@ private List executeSearch(String query, String database, Logger log) } catch (IOException | JSONException e) { - log.error("Error searching {} with query: {}", database, query, e); + log.error("Error searching " + database + " with query: " + query , e); return Collections.emptyList(); } } @@ -381,9 +375,8 @@ protected JSONObject getJson(String url) throws IOException .setConnectionManager(connectionManager) .build()) { - HttpGet get = new HttpGet(url); - get.setHeader("User-Agent", "PanoramaPublic/1.0"); - return client.execute(get, response -> { + HttpGet getRequest = new HttpGet(url); + return client.execute(getRequest, response -> { int status = response.getCode(); if (status < 200 || status >= 300) { @@ -423,7 +416,8 @@ private Map fetchMetadata(Collection ids, String dat "?db=" + database + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + "&retmode=json" + - "&email=" + URLEncoder.encode(NCBI_EMAIL, StandardCharsets.UTF_8); + "&tool=" + TOOL + + "&email=" + URLEncoder.encode(EMAIL, StandardCharsets.UTF_8); try { @@ -556,17 +550,17 @@ static List applyPriorityFiltering(List arti } /** - * Sort articles by proximity of publication date to the reference date (experiment creation date). - * Articles with dates closest to the reference date come first. + * Sort articles by proximity of publication date to the experiment creation date (proxy for data submission date) + * Articles with dates closest to the experiment creation date come first. * Articles without a publication date are sorted to the end. */ - static List sortByDateProximity(List articles, @NotNull Date referenceDate) + static List sortByDateProximity(List articles, @NotNull Date experimentDate) { if (articles.size() <= 1) { return articles; } - long refTime = referenceDate.getTime(); + long refTime = experimentDate.getTime(); articles = new ArrayList<>(articles); articles.sort(Comparator.comparingLong(a -> a.getPublicationDate() != null ? Math.abs(a.getPublicationDate().getTime() - refTime) : Long.MAX_VALUE @@ -702,7 +696,7 @@ static boolean checkAuthorMatch(JSONObject metadata, String firstName, String la String authorName = author.optString("name", "").toLowerCase(); - // Match format: "LastName FirstInitial" or "LastName F" + // Match format: "LastName FirstInitial" if (authorName.startsWith(lastNameLower)) { String afterLastName = authorName.substring(lastNameLower.length()).trim(); @@ -746,29 +740,31 @@ static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) } // Bi-directional keyword matching: match if either direction meets the threshold. - // This handles cases where one title is much more specific than the other. - return keywordsMatch(datasetTitle, normalizedArticleTitle) - || keywordsMatch(metadata.optString("title", metadata.optString("sorttitle", "")), normalizedDatasetTitle); + List articleKeywords = extractTitleKeywords(normalizedArticleTitle); + List datasetKeywords = extractTitleKeywords(normalizedDatasetTitle); + + return keywordsMatch(datasetKeywords, articleKeywords) + || keywordsMatch(articleKeywords, datasetKeywords); } /** - * Check if keywords extracted from {@code sourceTitle} are present in {@code normalizedTarget}. - * Returns true if at least 60% of keywords match (with a minimum of 2). + * Check if at least 60% of {@code sourceKeywords} are present in {@code targetKeywords} (with a minimum of 2). */ - private static boolean keywordsMatch(String sourceTitle, String normalizedTarget) + private static boolean keywordsMatch(List sourceKeywords, List targetKeywords) { - List keywords = extractTitleKeywords(sourceTitle); - if (keywords.isEmpty()) + if (sourceKeywords.size() < MIN_KEYWORDS_FOR_MATCH || targetKeywords.size() < MIN_KEYWORDS_FOR_MATCH) { return false; } - long matchCount = keywords.stream() - .filter(keyword -> normalizedTarget.contains(keyword)) + Set targetSet = new java.util.HashSet<>(targetKeywords); + + long matchCount = sourceKeywords.stream() + .filter(targetSet::contains) .count(); - int required = (int) Math.ceil(keywords.size() * KEYWORD_MATCH_THRESHOLD); - required = Math.max(required, Math.min(2, keywords.size())); + int required = (int) Math.ceil(sourceKeywords.size() * KEYWORD_MATCH_THRESHOLD); + required = Math.max(required, Math.min(2, sourceKeywords.size())); return matchCount >= required; } @@ -838,27 +834,48 @@ static List extractTitleKeywords(String title) /** * Parse publication date from ESummary metadata. - * Tries "pubdate" first, then "epubdate". Dates are typically in "YYYY Mon DD" or "YYYY Mon" format. + * Tries "sortpubdate" or "sortdate" first (format "YYYY/MM/DD HH:MM"), then falls back to + * "pubdate" and "epubdate" (formats "YYYY Mon DD", "YYYY Mon", or "YYYY"). */ static @Nullable Date parsePublicationDate(JSONObject metadata, Logger log) { - String dateStr = metadata.optString("pubdate", ""); - if (StringUtils.isBlank(dateStr)) + // Try sort dates first — these have a consistent "YYYY/MM/DD HH:MM" format + for (String field : new String[]{"sortpubdate", "sortdate"}) { - dateStr = metadata.optString("epubdate", ""); + String dateStr = metadata.optString(field, "").trim(); + if (!StringUtils.isBlank(dateStr)) + { + try + { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm"); + sdf.setLenient(false); + return sdf.parse(dateStr); + } + catch (ParseException ignored) + { + } + } + } + + // Fall back to pubdate / epubdate + String dateStr = ""; + for (String field : new String[]{"pubdate", "epubdate"}) + { + dateStr = metadata.optString(field, "").trim(); + if (!StringUtils.isBlank(dateStr)) break; } if (StringUtils.isBlank(dateStr)) { return null; } - // NCBI dates are typically "YYYY Mon DD", "YYYY Mon", or "YYYY" - String[] formats = {"yyyy MMM dd", "yyyy MMM", "yyyy"}; - for (String format : formats) + for (String format : new String[]{"yyyy MMM dd", "yyyy MMM", "yyyy"}) { try { - return new SimpleDateFormat(format).parse(dateStr); + SimpleDateFormat sdf = new SimpleDateFormat(format); + sdf.setLenient(false); + return sdf.parse(dateStr); } catch (ParseException ignored) { @@ -921,8 +938,8 @@ public void testIsPreprint() assertFalse(isPreprint(articleMetadata("J Proteome Res", "Journal of Proteome Research"))); // Preprint by source - assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv"))); assertTrue(isPreprint(articleMetadata("medRxiv", ""))); + assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv : the preprint server for biology"))); // Preprint by journal name assertTrue(isPreprint(articleMetadata("", "Research Square"))); @@ -940,27 +957,30 @@ public void testIsPreprint() @Test public void testCheckAuthorMatch() { - // Standard match: "Sharma V" matches firstName=Vagisha, lastName=Sharma - JSONObject metadata = metadataWithAuthors("Sharma V", "Jones AB", "Smith CD"); - assertTrue(checkAuthorMatch(metadata, "Vagisha", "Sharma")); + // Standard match: "Jones A" matches firstName=Andrew, lastName=Jones + JSONObject metadata = metadataWithAuthors("Smith CD", "Jones A", "Brown EF"); + assertTrue(checkAuthorMatch(metadata, "Andrew", "Jones")); + + // Match with multiple initials: "Jones AB" matches firstName=Andrew, lastName=Jones + assertTrue(checkAuthorMatch(metadataWithAuthors("Jones AB"), "Andrew", "Jones")); // Match is case-insensitive - assertTrue(checkAuthorMatch(metadataWithAuthors("sharma v"), "Vagisha", "Sharma")); - assertTrue(checkAuthorMatch(metadataWithAuthors("SHARMA V"), "vagisha", "sharma")); + assertTrue(checkAuthorMatch(metadataWithAuthors("jones a"), "Andrew", "Jones")); + assertTrue(checkAuthorMatch(metadataWithAuthors("JONES A"), "andrew", "jones")); + + // Author with full first name: "Jones Andrew" + assertTrue(checkAuthorMatch(metadataWithAuthors("Jones Andrew"), "Andrew", "Jones")); // No matching author - assertFalse(checkAuthorMatch(metadataWithAuthors("Jones AB", "Smith CD"), "Vagisha", "Sharma")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Smith CD", "Brown EF"), "Andrew", "Jones")); // Blank first or last name - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "", "Sharma")); - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), "Vagisha", "")); - assertFalse(checkAuthorMatch(metadataWithAuthors("Sharma V"), null, "Sharma")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), "", "Jones")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), "Andrew", "")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), null, "Jones")); // Empty authors array - assertFalse(checkAuthorMatch(new JSONObject(), "Vagisha", "Sharma")); - - // Author with full first name: "Sharma Vagisha" - assertTrue(checkAuthorMatch(metadataWithAuthors("Sharma Vagisha"), "Vagisha", "Sharma")); + assertFalse(checkAuthorMatch(new JSONObject(), "Andrew", "Jones")); } // -- checkTitleMatch tests -- @@ -969,59 +989,41 @@ public void testCheckAuthorMatch() public void testCheckTitleMatchExact() { // Exact match (case-insensitive, punctuation-stripped) - JSONObject metadata = metadataWithTitle("Quantitative Proteomics of Muscle Fibers"); - assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers")); - assertTrue(checkTitleMatch(metadata, "quantitative proteomics of muscle fibers")); - assertTrue(checkTitleMatch(metadata, "Quantitative Proteomics of Muscle Fibers!")); + JSONObject metadata = metadataWithTitle("Carafe in silico spectral library generation for DIA"); + assertTrue(checkTitleMatch(metadata, "Carafe in silico spectral library generation for DIA")); + assertTrue(checkTitleMatch(metadata, "carafe in silico spectral library generation for dia")); + assertTrue(checkTitleMatch(metadata, "Carafe: in silico spectral library generation for DIA!")); } @Test public void testCheckTitleMatchKeywords() { - // Article: "quantitative proteomics reveals muscle fibers composition" - // Dataset: "muscle fibers composition proteomics quantitative patterns" - // Dataset keywords: "muscle", "fibers", "composition", "proteomics", "quantitative", "patterns" (6 keywords) - // Matches in article: "muscle", "fibers", "composition", "proteomics", "quantitative" (5 of 6 = 83%) -> pass - JSONObject metadata = metadataWithTitle("A study of quantitative proteomics reveals muscle fibers composition"); - assertTrue(checkTitleMatch(metadata, "muscle fibers composition proteomics quantitative patterns")); - - // Keywords not present in article title — well below 60% - assertFalse(checkTitleMatch(metadata, "Novel cardiac lipids quantitation in mouse tissue")); - } - - @Test - public void testCheckTitleMatchThreshold() - { - // Article contains: "phosphoproteomics", "analysis", "lung", "cancer", "cell", "lines" - JSONObject metadata = metadataWithTitle("Phosphoproteomics analysis of lung cancer cell lines"); + // Article: "Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics" + // Dataset: "Carafe: a tool for in silico spectral library generation for DIA proteomics" + // Dataset keywords: "carafe", "tool", "silico", "spectral", "library", "generation", "dia", "proteomics" (8 keywords) + // Matches in article: "carafe", "silico", "spectral", "library", "generation", "proteomics" (6 of 8 = 73%) -> pass + JSONObject metadata = metadataWithTitle("Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics"); + assertTrue(checkTitleMatch(metadata, "Carafe: a tool for in silico spectral library generation for DIA proteomics")); - // 4 of 5 keywords match (80%) -> pass (threshold is 60%) - assertTrue(checkTitleMatch(metadata, "phosphoproteomics lung cancer cell biomarkers")); + // Keywords not present in article title — below 60% + assertFalse(checkTitleMatch(metadata, "Carafe for generating spectrum libraries from DIA data")); - // Only 2 of 5 keywords match (40%) -> fail - assertFalse(checkTitleMatch(metadata, "phosphoproteomics kidney heart liver cancer")); - } - - @Test - public void testCheckTitleMatchShortKeywords() - { - // Short but meaningful words (3-4 chars) should be extracted as keywords - JSONObject metadata = metadataWithTitle("DIA proteomics of lung cell iron metabolism in mice"); - assertTrue(checkTitleMatch(metadata, "DIA lung cell iron mice proteomics")); + // Dataset title tool short + assertFalse(checkTitleMatch(metadata, "Carafe")); } @Test public void testCheckTitleMatchBidirectional() { - // Forward direction fails: dataset is very specific, article is short. - // Dataset keywords: "comprehensive", "phosphoproteomics", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) + // Forward direction fails: dataset is very specific, article title is short. + // Dataset keywords: "comprehensive", "phosphoproteomic", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) // Only "lung", "cancer" found in article (2 of 9 = 22%) -> forward fails // // Reverse direction passes: article keywords: "lung", "cancer", "proteomics" (3) - // All 3 found in dataset -> 100% -> reverse passes + // "lung" and "cancer" found in dataset (2 of 3 = 67%) -> passes 60% threshold JSONObject metadata = metadataWithTitle("Lung cancer proteomics"); assertTrue(checkTitleMatch(metadata, - "Comprehensive phosphoproteomics analysis reveals novel biomarkers in lung cancer cell lines")); + "Comprehensive phosphoproteomic analysis reveals novel biomarkers in lung cancer cell lines")); // Both directions fail — completely unrelated titles assertFalse(checkTitleMatch(metadataWithTitle("Cardiac tissue lipidomics"), @@ -1038,9 +1040,10 @@ public void testCheckTitleMatchEdgeCases() // Missing title in metadata assertFalse(checkTitleMatch(new JSONObject(), "Some title")); - // Dataset title with only stop words and short words below MIN_KEYWORD_LENGTH - // "a the and for with" -> all stop words or < 3 chars -> no keywords - assertFalse(checkTitleMatch(metadataWithTitle("Different title entirely"), "a the and for with")); + // Stop words in titles should be ignored — only meaningful keywords matter. + // All words other than "Cats" and "Dogs" will be ignored + assertFalse(checkTitleMatch(metadataWithTitle("The Role of Cats: A study based on all other studies"), + "The Role of Dogs: A study based on all other studies")); } @Test @@ -1048,6 +1051,7 @@ public void testCheckTitleMatchSorttitleFallback() { // When "title" is missing, falls back to "sorttitle" JSONObject metadata = new JSONObject(); + metadata.put("title", ""); metadata.put("sorttitle", "phosphoproteomics of lung cancer"); assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); @@ -1150,7 +1154,21 @@ public void testExtractPubMedId() @Test public void testParsePublicationDate() { - // "YYYY Mon DD" format + // sortpubdate is preferred (format "YYYY/MM/DD HH:MM") + assertNotNull(parsePublicationDate(metadataWithDate("sortpubdate", "2025/11/06 00:00"), LOG)); + + // sortdate is next + assertNotNull(parsePublicationDate(metadataWithDate("sortdate", "2025/11/06 00:00"), LOG)); + + // sortpubdate takes priority over pubdate + JSONObject metadata = new JSONObject(); + metadata.put("sortpubdate", "2025/11/06 00:00"); + metadata.put("pubdate", "2024 Jan"); + Date parsed = parsePublicationDate(metadata, LOG); + assertNotNull(parsed); + assertTrue("sortpubdate should be used over pubdate", parsed.toString().contains("2025")); + + // Falls back to "pubdate" when sort dates are absent assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan 15"), LOG)); // "YYYY Mon" format @@ -1174,24 +1192,24 @@ public void testParsePublicationDate() @Test public void testApplyPriorityFiltering() { - Date refDate = new Date(); + Date articleDate = new Date(); // Single article returned as-is - PublicationMatch single = createMatch("111", true, false, false, false, false, refDate); - List result = applyPriorityFiltering(List.of(single), refDate, LOG); + PublicationMatch single = createMatch("111", true, false, false, false, false, articleDate); + List result = applyPriorityFiltering(List.of(single), articleDate, LOG); assertEquals(1, result.size()); // Articles found by multiple data IDs preferred over single-ID matches - PublicationMatch multiId = createMatch("222", true, true, false, true, true, refDate); - PublicationMatch singleId = createMatch("333", true, false, false, true, true, refDate); - result = applyPriorityFiltering(List.of(singleId, multiId), refDate, LOG); + PublicationMatch multiFieldMatch = createMatch("222", true, true, false, true, true, articleDate); + PublicationMatch singleFieldMatch = createMatch("333", true, false, false, true, true, articleDate); + result = applyPriorityFiltering(List.of(singleFieldMatch, multiFieldMatch), articleDate, LOG); assertEquals(1, result.size()); assertEquals("222", result.get(0).getPublicationId()); // Among single-ID matches, author+title both matching preferred - PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, refDate); - PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, refDate); - result = applyPriorityFiltering(List.of(authorOnly, bothMatch), refDate, LOG); + PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, articleDate); + PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, articleDate); + result = applyPriorityFiltering(List.of(authorOnly, bothMatch), articleDate, LOG); assertEquals(1, result.size()); assertEquals("444", result.get(0).getPublicationId()); } @@ -1199,15 +1217,15 @@ public void testApplyPriorityFiltering() @Test public void testSortByDateProximity() { - Date refDate = new Date(); - Date closer = new Date(refDate.getTime() - 86400000L); // 1 day before - Date farther = new Date(refDate.getTime() - 86400000L * 365); // 1 year before + Date exptSubmitDate = new Date(); + Date closer = new Date(exptSubmitDate.getTime() - 86400000L); // 1 day before + Date farther = new Date(exptSubmitDate.getTime() - 86400000L * 365); // 1 year before PublicationMatch farMatch = createMatch("111", true, false, false, false, false, farther); PublicationMatch closeMatch = createMatch("222", true, false, false, false, false, closer); PublicationMatch noDate = createMatch("333", true, false, false, false, false, null); - List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), refDate); + List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), exptSubmitDate); assertEquals("222", result.get(0).getPublicationId()); // closest assertEquals("111", result.get(1).getPublicationId()); // farther assertEquals("333", result.get(2).getPublicationId()); // no date last From d647fdbb115a01907ebfa5b6aea7ba44b41d0edd Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 5 Mar 2026 12:36:04 -0800 Subject: [PATCH 23/27] - Convert UserDismissedPublication from Boolean to Date to allow deferred re-search after dismissal - Re-search NCBI after deferral expires; clear dismissal if different publication found - Add publication search frequency (default 3 months) to admin UI and settings form - Update tests to match new log messages and Date-based dismissal --- .../panoramapublic-25.003-25.004.sql | 2 +- .../resources/schemas/panoramapublic.xml | 2 +- .../PanoramaPublicController.java | 31 +++++++-- .../message/PrivateDataReminderSettings.java | 28 ++++++++ .../panoramapublic/model/DatasetStatus.java | 6 +- .../pipeline/PrivateDataReminderJob.java | 68 +++++++++++++++---- .../view/privateDataRemindersSettingsForm.jsp | 13 ++++ .../PanoramaPublicBaseTest.java | 3 +- .../panoramapublic/PublicationSearchTest.java | 11 ++- 9 files changed, 137 insertions(+), 27 deletions(-) diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql index af421eba..2a4c320e 100644 --- a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -3,4 +3,4 @@ ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPublicationId VARCHAR(255); ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationType VARCHAR(50); ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationMatchInfo TEXT; -ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication BOOLEAN DEFAULT FALSE; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication TIMESTAMP; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 3ad59e8e..5c319c3d 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -874,7 +874,7 @@ Information about which fields (e.g. PXD, Panorama link, title etc.) matched - User indicated the found publication is not related to their data + Date when the user dismissed the publication suggestion for this dataset
SelectPublication IDPublication MatchesUser Dismissed<%=h(match.getPublicationLabel())%> + <%=simpleLink(match.getCitation() != null ? match.getCitation() : match.getPublicationUrl(), match.getPublicationUrl()).target("_blank")%> + <%=h(match.getMatchInfo())%><%=h(pubId.equals(bean.getDismissedPubId()) ? "Yes" : "")%>
diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index b4cb0220..36ba6cc7 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10118,6 +10118,14 @@ else if (form.getExtensionLength() < 0) { errors.reject(ERROR_MSG, "Value for 'Extension duration' cannot be less than 0."); } + if (form.getPublicationSearchFrequency() == null) + { + errors.reject(ERROR_MSG, "Please enter a value for 'Publication search frequency'."); + } + else if (form.getPublicationSearchFrequency() < 1) + { + errors.reject(ERROR_MSG, "Value for 'Publication search frequency' must be at least 1."); + } if (form.getReminderTime() == null) { errors.reject(ERROR_MSG, "Please enter a value for 'Reminder time'."); @@ -10141,6 +10149,7 @@ public ModelAndView getView(PrivateDataReminderSettingsForm form, boolean reshow form.setReminderFrequency(settings.getReminderFrequency()); form.setExtensionLength(settings.getExtensionLength()); form.setEnablePublicationSearch(settings.isEnablePublicationSearch()); + form.setPublicationSearchFrequency(settings.getPublicationSearchFrequency()); } VBox view = new VBox(); @@ -10160,6 +10169,7 @@ public boolean handlePost(PrivateDataReminderSettingsForm form, BindException er settings.setReminderFrequency(form.getReminderFrequency()); settings.setExtensionLength(form.getExtensionLength()); settings.setEnablePublicationSearch(form.isEnablePublicationSearch()); + settings.setPublicationSearchFrequency(form.getPublicationSearchFrequency()); PrivateDataReminderSettings.save(settings); PrivateDataMessageScheduler.getInstance().initialize(settings.isEnableReminders()); @@ -10198,6 +10208,7 @@ public static class PrivateDataReminderSettingsForm private Integer _reminderFrequency; private Integer _delayUntilFirstReminder; private boolean _enablePublicationSearch; + private Integer _publicationSearchFrequency; public boolean isEnabled() { @@ -10258,6 +10269,16 @@ public void setEnablePublicationSearch(boolean enablePublicationSearch) { _enablePublicationSearch = enablePublicationSearch; } + + public Integer getPublicationSearchFrequency() + { + return _publicationSearchFrequency; + } + + public void setPublicationSearchFrequency(Integer publicationSearchFrequency) + { + _publicationSearchFrequency = publicationSearchFrequency; + } } @RequiresPermission(AdminOperationsPermission.class) @@ -10678,7 +10699,7 @@ protected void doValidationForAction(Errors errors) { errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); } - else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPublication())) + else if (_datasetStatus.getUserDismissedPublication() != null) { errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + " has already been dismissed"); @@ -10697,7 +10718,7 @@ else if (Boolean.TRUE.equals(_datasetStatus.getUserDismissedPublication())) @Override protected void updateDatasetStatus(DatasetStatus datasetStatus) { - datasetStatus.setUserDismissedPublication(true); + datasetStatus.setUserDismissedPublication(new Date()); } @Override @@ -10753,7 +10774,7 @@ public ModelAndView getView(ExperimentIdForm form, BindException errors) // Check if any displayed match was dismissed by the user boolean showDismissedColumn = false; String dismissedPubId = null; - if (datasetStatus != null && Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication()) + if (datasetStatus != null && datasetStatus.getUserDismissedPublication() != null && datasetStatus.getPotentialPublicationId() != null) { dismissedPubId = datasetStatus.getPotentialPublicationId(); @@ -10926,7 +10947,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws // Check if the user has already dismissed this publication suggestion DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); - if (datasetStatus != null && Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication()) + if (datasetStatus != null && datasetStatus.getUserDismissedPublication() != null && form.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) { errors.reject(ERROR_MSG, "The user has already dismissed the publication suggestion " @@ -10995,7 +11016,7 @@ journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, datasetStatus.setPotentialPublicationId(form.getPublicationId()); datasetStatus.setPublicationType(pubType.name()); datasetStatus.setPublicationMatchInfo(form.getMatchInfo()); - datasetStatus.setUserDismissedPublication(false); + datasetStatus.setUserDismissedPublication(null); datasetStatus.setLastReminderDate(new Date()); if (datasetStatus.getId() == 0) diff --git a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java index 124e8398..4837c071 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java +++ b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java @@ -25,6 +25,7 @@ public class PrivateDataReminderSettings public static final String PROP_REMINDER_FREQUENCY = "Reminder frequency (months)"; public static final String PROP_EXTENSION_LENGTH = "Extension duration (months)"; public static final String PROP_ENABLE_PUBLICATION_SEARCH = "Enable publication search"; + public static final String PROP_PUBLICATION_SEARCH_FREQUENCY = "Publication search frequency (months)"; private static final boolean DEFAULT_ENABLE_REMINDERS = false; public static final String DEFAULT_REMINDER_TIME = "8:00 AM"; @@ -32,6 +33,7 @@ public class PrivateDataReminderSettings private static final int DEFAULT_REMINDER_FREQUENCY = 1; // Send reminders once a month, unless extension or deletion was requested. private static final int DEFAULT_EXTENSION_LENGTH = 6; // Private status of a dataset can be extended by 6 months. private static final boolean DEFAULT_ENABLE_PUBLICATION_SEARCH = false; + private static final int DEFAULT_PUBLICATION_SEARCH_FREQUENCY = 3; // Re-search every 3 months after dismissal public static final String DATE_FORMAT_PATTERN = "MMMM d, yyyy"; public static final String REMINDER_TIME_FORMAT = "h:mm a"; @@ -43,6 +45,7 @@ public class PrivateDataReminderSettings private int _reminderFrequency; private int _extensionLength; private boolean _enablePublicationSearch; + private int _publicationSearchFrequency; public static PrivateDataReminderSettings get() { @@ -78,6 +81,11 @@ public static PrivateDataReminderSettings get() ? DEFAULT_ENABLE_PUBLICATION_SEARCH : Boolean.valueOf(settingsMap.get(PROP_ENABLE_PUBLICATION_SEARCH)); settings.setEnablePublicationSearch(enablePublicationCheck); + + int publicationSearchFrequency = settingsMap.get(PROP_PUBLICATION_SEARCH_FREQUENCY) == null + ? DEFAULT_PUBLICATION_SEARCH_FREQUENCY + : Integer.valueOf(settingsMap.get(PROP_PUBLICATION_SEARCH_FREQUENCY)); + settings.setPublicationSearchFrequency(publicationSearchFrequency); } else { @@ -87,6 +95,7 @@ public static PrivateDataReminderSettings get() settings.setExtensionLength(DEFAULT_EXTENSION_LENGTH); settings.setReminderTime(parseReminderTime(DEFAULT_REMINDER_TIME)); settings.setEnablePublicationSearch(DEFAULT_ENABLE_PUBLICATION_SEARCH); + settings.setPublicationSearchFrequency(DEFAULT_PUBLICATION_SEARCH_FREQUENCY); } return settings; @@ -122,6 +131,7 @@ public static void save(PrivateDataReminderSettings settings) settingsMap.put(PROP_EXTENSION_LENGTH, String.valueOf(settings.getExtensionLength())); settingsMap.put(PROP_REMINDER_TIME, settings.getReminderTimeFormatted()); settingsMap.put(PROP_ENABLE_PUBLICATION_SEARCH, String.valueOf(settings.isEnablePublicationSearch())); + settingsMap.put(PROP_PUBLICATION_SEARCH_FREQUENCY, String.valueOf(settings.getPublicationSearchFrequency())); settingsMap.save(); } @@ -190,6 +200,16 @@ public void setEnablePublicationSearch(boolean enablePublicationSearch) _enablePublicationSearch = enablePublicationSearch; } + public int getPublicationSearchFrequency() + { + return _publicationSearchFrequency; + } + + public void setPublicationSearchFrequency(int publicationSearchFrequency) + { + _publicationSearchFrequency = publicationSearchFrequency; + } + public @Nullable Date getReminderValidUntilDate(@NotNull DatasetStatus status) { return status.getLastReminderDate() == null ? null : addMonths(status.getLastReminderDate(), getReminderFrequency()); @@ -210,6 +230,14 @@ public boolean isExtensionValid(@NotNull DatasetStatus status) return isDateInFuture(getExtensionValidUntilDate(status)); } + public boolean isPublicationDismissalRecent(@NotNull DatasetStatus status) + { + Date dismissed = status.getUserDismissedPublication(); + if (dismissed == null) return false; + Date searchDeferralEnd = addMonths(dismissed, getPublicationSearchFrequency()); + return isDateInFuture(searchDeferralEnd); + } + public @Nullable String extensionValidUntilFormatted(@NotNull DatasetStatus status) { Date date = getExtensionValidUntilDate(status); diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 00be1a53..bf4bdec2 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -16,7 +16,7 @@ public class DatasetStatus extends DbEntity private String _potentialPublicationId; private String _publicationType; private String _publicationMatchInfo; - private Boolean _userDismissedPublication; + private Date _userDismissedPublication; public int getExperimentAnnotationsId() { @@ -115,12 +115,12 @@ public void setPublicationMatchInfo(String publicationMatchInfo) _publicationMatchInfo = publicationMatchInfo; } - public Boolean getUserDismissedPublication() + public Date getUserDismissedPublication() { return _userDismissedPublication; } - public void setUserDismissedPublication(Boolean userDismissedPublication) + public void setUserDismissedPublication(Date userDismissedPublication) { _userDismissedPublication = userDismissedPublication; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 8aef9276..d2717ff3 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -172,10 +172,7 @@ public static ReminderDecision skip(String reason) { public boolean shouldPost() { return shouldPost; } public String getReason() { return reason; } } - - /** - * Container for publication search results used during reminder processing - */ + /** * Checks for publications associated with the experiment if enabled in settings * @param expAnnotations The experiment to check @@ -184,9 +181,10 @@ public static ReminderDecision skip(String reason) { * @param log Logger for diagnostic messages * @return NcbiArticleMatch with search result */ - private static PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, + private PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @NotNull PrivateDataReminderSettings settings, boolean forceCheck, + @NotNull User user, @NotNull Logger log) { // Check if publication checking is enabled (either globally or forced for this run) @@ -200,11 +198,46 @@ private static PublicationMatch searchForPublication(@NotNull ExperimentAnnotati DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); if (datasetStatus != null) { - // If user has dismissed the publication suggestion, don't search again - if (Boolean.TRUE.equals(datasetStatus.getUserDismissedPublication())) + // If user has dismissed the publication suggestion, check search delay + Date dismissedDate = datasetStatus.getUserDismissedPublication(); + if (dismissedDate != null) { - log.info(String.format("User has dismissed publication suggestion for experiment %d; skipping search", expAnnotations.getId())); - return null; + if (settings.isPublicationDismissalRecent(datasetStatus)) + { + log.info(String.format("User dismissed publication for experiment %d on %s; Publication search deferred (%d months)", + expAnnotations.getId(), dismissedDate, settings.getPublicationSearchFrequency())); + return null; + } + + // Search deferral expired — re-search NCBI + log.info(String.format("Search deferral expired for experiment %d (dismissed %s); re-searching NCBI", + expAnnotations.getId(), dismissedDate)); + try + { + PublicationMatch newMatch = NcbiPublicationSearchService.get().searchForPublication(expAnnotations, log); + if (newMatch != null && !newMatch.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + { + // Different publication found — return it (caller will save and notify) + log.info(String.format("New publication %s found for experiment %d (previously dismissed %s)", + newMatch.getPublicationId(), expAnnotations.getId(), datasetStatus.getPotentialPublicationId())); + return newMatch; + } + else + { + // Same publication or nothing found — update dismissal date to restart search delay + log.info(String.format("No new publication for experiment %d; resetting search delay", + expAnnotations.getId())); + datasetStatus.setUserDismissedPublication(new Date()); + DatasetStatusManager.update(datasetStatus, user); + return null; + } + } + catch (Exception e) + { + log.error(String.format("Error re-searching publication for experiment %d: %s", + expAnnotations.getId(), e.getMessage()), e); + return null; + } } // If we already have a cached publication ID, use it @@ -332,7 +365,7 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex } // Check for publications if enabled - PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, processingResults._log); + PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, getUser(), processingResults._log); if (!context.isTestMode()) { @@ -392,12 +425,17 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, @Nullable { datasetStatus.setLastReminderDate(new Date()); - // Save publication search results if found and not already cached - if (publicationResult != null && StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + // Update publication search results + if (publicationResult != null) { - datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); - datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); - datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + // If this is a new/different publication, update it and clear dismissal + if (!publicationResult.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + { + datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); + datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); + datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + datasetStatus.setUserDismissedPublication(null); // Clear dismissal for new publication + } } DatasetStatusManager.update(datasetStatus, getUser()); diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp index 1567f026..3d974d35 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp @@ -157,6 +157,19 @@

+ + + <%=h(PrivateDataReminderSettings.PROP_PUBLICATION_SEARCH_FREQUENCY)%> + + + +
+ Number of months to wait after a user dismisses a publication suggestion before re-searching. +
+ If a different publication is found after this delay, the submitter will be notified again. +
+ + <%=button("Save").submit(true)%> <%=button("Cancel").href(panoramaPublicAdminUrl)%> diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java index a8a2efae..1af9f4c4 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java @@ -619,7 +619,7 @@ protected void verifyIsPublicColumn(String panoramaPublicProject, String experim /** * Navigate to the Private Data Reminder Settings page and read the current form values. - * Returns a map with keys: extensionLength, delayUntilFirstReminder, reminderFrequency, enablePublicationSearch. + * Returns a map with keys: extensionLength, delayUntilFirstReminder, reminderFrequency, enablePublicationSearch, publicationSearchFrequency. */ protected Map getPrivateDataReminderSettings() { @@ -632,6 +632,7 @@ protected Map getPrivateDataReminderSettings() settings.put("delayUntilFirstReminder", getFormElement(Locator.input("delayUntilFirstReminder"))); settings.put("reminderFrequency", getFormElement(Locator.input("reminderFrequency"))); settings.put("enablePublicationSearch", String.valueOf(Locator.checkboxByName("enablePublicationSearch").findElement(getDriver()).isSelected())); + settings.put("publicationSearchFrequency", getFormElement(Locator.input("publicationSearchFrequency"))); return settings; } diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java index 1b2cad44..470b0273 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java @@ -173,8 +173,17 @@ public void testPublicationSearchAndDismiss() goToProjectHome(panoramaPublicProject); goToDataPipeline(); goToDataPipeline().clickStatusLink(0); - String skipMessage = String.format("User has dismissed publication suggestion for experiment %d; skipping search", exptId1); + String skipMessage = String.format("User dismissed publication for experiment %d", exptId1); assertTextPresent(skipMessage); + assertTextPresent("Publication search deferred"); + + // Verify that the reminder for the dismissed dataset 1 is about making data public, not about a publication. + // The thread already has a "Publication Found" message from Step 6 (before dismissal), + // so we check that the pipeline job posted a "Status Update" message was also posted after the "Publication Found" message. + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); + assertTextPresentInThisOrder("Action Required: Publication Found for Your Data on Panorama Public", + "Action Required: Status Update for Your Private Data on Panorama Public"); // Step 10: Verify that the pipeline job found a publication for dataset 2 and posted a message goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_2); From 686a368d4a019c236cc5a4569a396ef72de64e24 Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 5 Mar 2026 13:47:45 -0800 Subject: [PATCH 24/27] In DismissPublicationSuggestionAction's confirm view, if there is an exception fetching the citation from NCBI, display the publication Id label instead. --- .../PanoramaPublicController.java | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 36ba6cc7..9d6f94f7 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -10679,6 +10679,7 @@ protected String getConfirmViewTitle() @Override protected Renderable getConfirmViewMessage() { + fetchCitation(); String publicationRef = _publicationMatch.getCitation() != null ? _publicationMatch.getCitation() : _publicationMatch.getPublicationIdLabel(); @@ -10704,13 +10705,24 @@ else if (_datasetStatus.getUserDismissedPublication() != null) errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + " has already been dismissed"); } - else + } + + // Fetch the citation from NCBI. If NCBI is unavailable, the publication ID label will be used as a fallback. + private void fetchCitation() + { + if (_publicationMatch != null && _publicationMatch.getCitation() == null) { - // Fetch the citation from NCBI. If we cannot retrieve it, the publication ID will be used as a fallback. - String citation = NcbiPublicationSearchService.get().getCitation(_publicationMatch.getPublicationId(), _publicationMatch.getPublicationType()); - if (!StringUtils.isBlank(citation)) + try + { + String citation = NcbiPublicationSearchService.get().getCitation(_publicationMatch.getPublicationId(), _publicationMatch.getPublicationType()); + if (!StringUtils.isBlank(citation)) + { + _publicationMatch.setCitation(citation); + } + } + catch (Exception e) { - _publicationMatch.setCitation(citation); + LOG.warn("Failed to fetch citation for " + _publicationMatch.getPublicationIdLabel() + ": " + e.getMessage()); } } } @@ -10724,6 +10736,7 @@ protected void updateDatasetStatus(DatasetStatus datasetStatus) @Override protected void postNotification() { + fetchCitation(); // Post a message to the support thread. JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(_exptAnnotations); Journal journal = JournalManager.getJournal(submission.getJournalId()); From c76ca1fca975a9ba30c61ed522ce44c0fa63fadd Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Thu, 5 Mar 2026 14:00:02 -0800 Subject: [PATCH 25/27] - Added volatile keyword to NcbiPublicationSearchService _instance to prevent stale reads - Fixed comment for PUBMED_ID regex - In searchPublicationsForDataset.jsp display the publication Id label when citation is not available. --- .../src/org/labkey/panoramapublic/ncbi/NcbiConstants.java | 2 +- .../panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java | 2 +- .../labkey/panoramapublic/view/searchPublicationsForDataset.jsp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java index 76f22c1c..8647973d 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java @@ -4,7 +4,7 @@ public class NcbiConstants { - /** Regex for validating numeric PubMed/PMC IDs (1-8 digits). */ + /** Regex for validating PubMed IDs (1-8 digits). Not used for PMC IDs which may be longer. */ public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 public enum DB diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java index 0174fb16..21597d46 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -58,7 +58,7 @@ public class NcbiPublicationSearchServiceImpl implements NcbiPublicationSearchSe private static final Logger LOG = LogHelper.getLogger(NcbiPublicationSearchServiceImpl.class, "Search NCBI for publications associated with Panorama Public datasets"); // Static holder for the service instance - private static NcbiPublicationSearchService _instance = new NcbiPublicationSearchServiceImpl(); + private static volatile NcbiPublicationSearchService _instance = new NcbiPublicationSearchServiceImpl(); public static NcbiPublicationSearchService getInstance() { diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp index eea30be6..b7c75723 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp @@ -100,7 +100,7 @@ data-matchinfo="<%=h(match.getMatchInfo())%>" /> - <%=simpleLink(match.getCitation() != null ? match.getCitation() : match.getPublicationUrl(), match.getPublicationUrl()).target("_blank")%> + <%=simpleLink(match.getCitation() != null ? match.getCitation() : match.getPublicationIdLabel(), match.getPublicationUrl()).target("_blank")%> <%=h(match.getMatchInfo())%> <% if (bean.isShowDismissedColumn()) { %> From 705b09ce21f4c5f630eebb3348ad69e7d81c34ca Mon Sep 17 00:00:00 2001 From: Vagisha Sharma Date: Fri, 6 Mar 2026 13:20:27 -0800 Subject: [PATCH 26/27] - Added a citation column to the DatasetStatus table so that we don't have to lookup the citation for a saved publicationId. - Added test: running the pipeline job in "test" mode should not update rows in the DatasetStatus table - Updated PublicationMatch constructor and fromMatchInfo method - Fixed comments, renamed variables, class. --- .../panoramapublic-25.003-25.004.sql | 1 + .../resources/schemas/panoramapublic.xml | 7 +- .../PanoramaPublicController.java | 153 +++++++++--------- .../panoramapublic/model/DatasetStatus.java | 18 ++- .../MockNcbiPublicationSearchService.java | 7 +- .../ncbi/NcbiPublicationSearchService.java | 4 + .../panoramapublic/ncbi/PublicationMatch.java | 32 ++-- .../pipeline/PrivateDataReminderJob.java | 38 +++-- .../view/searchPublicationsForDataset.jsp | 2 +- .../view/searchPublicationsForm.jsp | 5 +- .../panoramapublic/PublicationSearchTest.java | 118 +++++++++++--- 11 files changed, 259 insertions(+), 126 deletions(-) diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql index 2a4c320e..e8ecb041 100644 --- a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -3,4 +3,5 @@ ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPublicationId VARCHAR(255); ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationType VARCHAR(50); ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationMatchInfo TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN Citation TEXT; ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication TIMESTAMP; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 5c319c3d..e2753337 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -865,14 +865,17 @@ - Cached publication ID (PubMed ID, PMC ID etc.) + Publication ID (PubMed ID, PMC ID) suggestion based on NCBI search - Type of publication ID: PMID, PMC, etc. + Type of publication ID: PMID, PMC Information about which fields (e.g. PXD, Panorama link, title etc.) matched + + NLM citation string for the potential publication match + Date when the user dismissed the publication suggestion for this dataset diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 9d6f94f7..dbe93cd2 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -159,6 +159,7 @@ import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchServiceImpl; import org.labkey.panoramapublic.ncbi.PublicationMatch; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import org.labkey.panoramapublic.model.CatalogEntry; import org.labkey.panoramapublic.model.DataLicense; import org.labkey.panoramapublic.model.DatasetStatus; @@ -183,7 +184,6 @@ import org.labkey.panoramapublic.proteomexchange.ChemElement; import org.labkey.panoramapublic.proteomexchange.ExperimentModificationGetter; import org.labkey.panoramapublic.proteomexchange.Formula; -import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import org.labkey.panoramapublic.proteomexchange.NcbiUtils; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeServiceException; @@ -10300,6 +10300,7 @@ public ModelAndView getView(PrivateDataSendReminderForm form, boolean reshow, Bi // Initialize form with current settings on first view if (!reshow) { + // Check the "Search for Publications" box if enabled in the saved settings PrivateDataReminderSettings settings = PrivateDataReminderSettings.get(); form.setSearchPublications(settings.isEnablePublicationSearch()); } @@ -10670,6 +10671,22 @@ public static class DismissPublicationSuggestionAction extends UpdateDatasetStat { private PublicationMatch _publicationMatch; + @Override + protected void doValidationForAction(Errors errors) + { + _publicationMatch = PublicationMatch.fromDatasetStatus(_datasetStatus); + + if (_publicationMatch == null) + { + errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); + } + else if (_datasetStatus.getUserDismissedPublication() != null) + { + errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() + + " has already been dismissed"); + } + } + @Override protected String getConfirmViewTitle() { @@ -10691,22 +10708,6 @@ protected Renderable getConfirmViewMessage() publicationRef); } - @Override - protected void doValidationForAction(Errors errors) - { - _publicationMatch = PublicationMatch.fromDatasetStatus(_datasetStatus); - - if (_publicationMatch == null) - { - errors.reject(ERROR_MSG, "No publication suggestion exists for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL()); - } - else if (_datasetStatus.getUserDismissedPublication() != null) - { - errors.reject(ERROR_MSG, "The publication suggestion for the data with short URL " + _exptAnnotations.getShortUrl().renderShortURL() - + " has already been dismissed"); - } - } - // Fetch the citation from NCBI. If NCBI is unavailable, the publication ID label will be used as a fallback. private void fetchCitation() { @@ -10756,6 +10757,21 @@ public ModelAndView getSuccessView(ShortUrlForm shortUrlForm) } } + public static class ShortUrlForm + { + private String _shortUrlEntityId; + + public String getShortUrlEntityId() + { + return _shortUrlEntityId; + } + + public void setShortUrlEntityId(String shortUrlEntityId) + { + _shortUrlEntityId = shortUrlEntityId; + } + } + @RequiresPermission(AdminOperationsPermission.class) public static class SearchPublicationsForDatasetAction extends SimpleViewAction { @@ -10785,21 +10801,17 @@ public ModelAndView getView(ExperimentIdForm form, BindException errors) DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(_exptAnnotations); // Check if any displayed match was dismissed by the user - boolean showDismissedColumn = false; + String dismissedPubId = null; - if (datasetStatus != null && datasetStatus.getUserDismissedPublication() != null - && datasetStatus.getPotentialPublicationId() != null) + if (datasetStatus != null) { - dismissedPubId = datasetStatus.getPotentialPublicationId(); - for (PublicationMatch match : matches) - { - if (match.getPublicationId().equals(dismissedPubId)) - { - showDismissedColumn = true; - break; - } - } + dismissedPubId = matches.stream() + .map(PublicationMatch::getPublicationId) + .filter(datasetStatus::isPublicationDismissed) + .findFirst() + .orElse(null); } + boolean showDismissedColumn = dismissedPubId != null; SearchPublicationsForDatasetBean bean = new SearchPublicationsForDatasetBean(_exptAnnotations, matches, showDismissedColumn, dismissedPubId); JspView jspView = new JspView<>("/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp", bean, errors); @@ -10891,46 +10903,8 @@ public Object execute(ExperimentIdForm form, BindException errors) } } - public static class NotifySubmitterForm extends IdForm - { - private String _publicationId; - private String _publicationType; - private String _matchInfo; - - public String getPublicationId() - { - return _publicationId; - } - - public void setPublicationId(String publicationId) - { - _publicationId = publicationId; - } - - public String getPublicationType() - { - return _publicationType; - } - - public void setPublicationType(String publicationType) - { - _publicationType = publicationType; - } - - public String getMatchInfo() - { - return _matchInfo; - } - - public void setMatchInfo(String matchInfo) - { - _matchInfo = matchInfo; - } - - } - @RequiresPermission(AdminOperationsPermission.class) - public static class NotifySubmitterOfPublicationsAction extends FormHandlerAction + public static class NotifySubmitterOfPublicationAction extends FormHandlerAction { private Container _announcementsContainer; private Announcement _announcement; @@ -10960,8 +10934,7 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws // Check if the user has already dismissed this publication suggestion DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); - if (datasetStatus != null && datasetStatus.getUserDismissedPublication() != null - && form.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + if (datasetStatus != null && datasetStatus.isPublicationDismissed(form.getPublicationId())) { errors.reject(ERROR_MSG, "The user has already dismissed the publication suggestion " + datasetStatus.getPublicationIdLabel() @@ -10977,8 +10950,8 @@ public boolean handlePost(NotifySubmitterForm form, BindException errors) throws } // Reconstruct PublicationMatch from form fields - PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo()); - selectedMatch.setCitation(NcbiPublicationSearchService.get().getCitation(form.getPublicationId(), pubType)); + String citation = NcbiPublicationSearchService.get().getCitation(form.getPublicationId(), pubType); + PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo(), citation); // Post notification JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); @@ -11029,6 +11002,7 @@ journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, datasetStatus.setPotentialPublicationId(form.getPublicationId()); datasetStatus.setPublicationType(pubType.name()); datasetStatus.setPublicationMatchInfo(form.getMatchInfo()); + datasetStatus.setCitation(selectedMatch.getCitation()); datasetStatus.setUserDismissedPublication(null); datasetStatus.setLastReminderDate(new Date()); @@ -11052,19 +11026,42 @@ public ActionURL getSuccessURL(NotifySubmitterForm form) } } - public static class ShortUrlForm + public static class NotifySubmitterForm extends IdForm { - private String _shortUrlEntityId; + private String _publicationId; + private String _publicationType; + private String _matchInfo; - public String getShortUrlEntityId() + public String getPublicationId() { - return _shortUrlEntityId; + return _publicationId; } - public void setShortUrlEntityId(String shortUrlEntityId) + public void setPublicationId(String publicationId) { - _shortUrlEntityId = shortUrlEntityId; + _publicationId = publicationId; + } + + public String getPublicationType() + { + return _publicationType; } + + public void setPublicationType(String publicationType) + { + _publicationType = publicationType; + } + + public String getMatchInfo() + { + return _matchInfo; + } + + public void setMatchInfo(String matchInfo) + { + _matchInfo = matchInfo; + } + } private static ExperimentAnnotations getValidExperimentAnnotations(ShortUrlForm shortUrlForm, Errors errors) diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index bf4bdec2..1a295efc 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -16,6 +16,7 @@ public class DatasetStatus extends DbEntity private String _potentialPublicationId; private String _publicationType; private String _publicationMatchInfo; + private String _citation; private Date _userDismissedPublication; public int getExperimentAnnotationsId() @@ -101,7 +102,7 @@ public void setPublicationType(String publicationType) public String getPublicationIdLabel() { NcbiConstants.DB type = NcbiConstants.DB.fromString(_publicationType); - String label = type == null ? (_publicationType != null ? _publicationType : "") : type.getLabel(); + String label = type == null ? (_publicationType != null ? _publicationType : "") : type.name(); return label + " ID " + getPotentialPublicationId(); } @@ -115,6 +116,16 @@ public void setPublicationMatchInfo(String publicationMatchInfo) _publicationMatchInfo = publicationMatchInfo; } + public String getCitation() + { + return _citation; + } + + public void setCitation(String citation) + { + _citation = citation; + } + public Date getUserDismissedPublication() { return _userDismissedPublication; @@ -124,4 +135,9 @@ public void setUserDismissedPublication(Date userDismissedPublication) { _userDismissedPublication = userDismissedPublication; } + + public boolean isPublicationDismissed(String publicationId) + { + return getUserDismissedPublication() != null && publicationId != null && publicationId.equals(getPotentialPublicationId()); + } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java index acf5cde5..47df3d4e 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java @@ -46,7 +46,8 @@ public class MockNcbiPublicationSearchService extends NcbiPublicationSearchServi * Null for pubmed articles (where {@code id} is already the PMID). * @param title article title * @param authors comma-separated author list (e.g. "Abbatiello SE,Mani DR,Schilling B") - * @param pubDate publication date string (e.g. "2013 Sep") + * @param pubDate publication date in "YYYY/MM/DD HH:MM" format, matching the NCBI ESummary + * sortpubdate (PubMed) or sortdate (PMC) field that parsePublicationDate() checks first * @param source journal abbreviation used as ESummary "source" field (e.g. "Mol Cell Proteomics", "bioRxiv") * @param journalFull full journal name used as ESummary "fulljournalname" field * @param citation NLM citation string (for getCitation, keyed by PMID; null if not applicable) @@ -64,7 +65,9 @@ public void register(String database, String id, String searchKey, metadata.put("sorttitle", title.toLowerCase()); metadata.put("source", source); metadata.put("fulljournalname", journalFull); - metadata.put("pubdate", pubDate); + // Real NCBI ESummary returns sortpubdate (PubMed) or sortdate (PMC) in "YYYY/MM/DD HH:MM" format. + // parsePublicationDate() checks these first before falling back to pubdate. + metadata.put(isPmc ? "sortdate" : "sortpubdate", pubDate); // Authors array JSONArray authorsArray = new JSONArray(); diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java index d01e45f2..6256596b 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -25,6 +25,10 @@ static NcbiPublicationSearchService get() @Nullable Pair getPubMedLinkAndCitation(String pubmedId); + /** + * Searches PMC and PubMed for a publication associated with the experiment. + * Returns the top match (highest priority) if multiple matches are found, or null if none. + */ @Nullable PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger); List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations); diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java index 4ba8e9ac..08f10954 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -11,9 +11,6 @@ import java.util.Date; import java.util.List; -/** - * Container for publication search article match - */ public class PublicationMatch { public static final String MATCH_PX_ID = "ProteomeXchange ID"; @@ -36,7 +33,7 @@ public class PublicationMatch public PublicationMatch(String publicationId, DB publicationType, boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, boolean matchesAuthor, boolean matchesTitle, - @Nullable Date publicationDate) + @Nullable Date publicationDate, @Nullable String citation) { _publicationId = publicationId; _publicationType = publicationType; @@ -46,6 +43,15 @@ public PublicationMatch(String publicationId, DB publicationType, _matchesAuthor = matchesAuthor; _matchesTitle = matchesTitle; _publicationDate = publicationDate; + _citation = citation; + } + + public PublicationMatch(String publicationId, DB publicationType, + boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, + boolean matchesAuthor, boolean matchesTitle, + @Nullable Date publicationDate) + { + this(publicationId, publicationType, matchesProteomeXchangeId, matchesPanoramaUrl, matchesDoi, matchesAuthor, matchesTitle, publicationDate, null); } public String getPublicationId() @@ -100,7 +106,7 @@ public void setCitation(@Nullable String citation) /** * String representation of what matched for this article. - * This is what gets stored in the PublicationMatchInfo database column. + * This is what gets stored in the DatasetStatus table's PublicationMatchInfo column. * Example: "ProteomeXchange ID, Panorama URL, Author, Title" */ public String getMatchInfo() @@ -144,10 +150,11 @@ public JSONObject toJson() } /** - * Build a PublicationMatch from a publication ID, type, and match info string. + * Build a PublicationMatch from a publication ID, type, citation, and match info string. * @see #getMatchInfo() */ - public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, @Nullable String matchInfo) + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, + @Nullable String matchInfo, @Nullable String citation) { boolean pxId = false, url = false, doi = false, author = false, title = false; if (!StringUtils.isBlank(matchInfo)) @@ -165,7 +172,13 @@ public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @Not } } } - return new PublicationMatch(publicationId, publicationType, pxId, url, doi, author, title, null); + return new PublicationMatch(publicationId, publicationType, pxId, url, doi, author, title, null, citation); + } + + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, + @Nullable String matchInfo) + { + return fromMatchInfo(publicationId, publicationType, matchInfo, null); } /** @@ -182,6 +195,7 @@ public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @Not { return null; } - return fromMatchInfo(datasetStatus.getPotentialPublicationId(), type, datasetStatus.getPublicationMatchInfo()); + return fromMatchInfo(datasetStatus.getPotentialPublicationId(), type, + datasetStatus.getPublicationMatchInfo(), datasetStatus.getCitation()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index d2717ff3..6b64d9e1 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -185,6 +185,7 @@ private PublicationMatch searchForPublication(@NotNull ExperimentAnnotations exp @NotNull PrivateDataReminderSettings settings, boolean forceCheck, @NotNull User user, + boolean testMode, @NotNull Logger log) { // Check if publication checking is enabled (either globally or forced for this run) @@ -224,11 +225,18 @@ private PublicationMatch searchForPublication(@NotNull ExperimentAnnotations exp } else { - // Same publication or nothing found — update dismissal date to restart search delay - log.info(String.format("No new publication for experiment %d; resetting search delay", - expAnnotations.getId())); - datasetStatus.setUserDismissedPublication(new Date()); - DatasetStatusManager.update(datasetStatus, user); + // Same publication or nothing found — update dismissal date to restart search deferral + if (testMode) + { + log.info(String.format("TEST MODE: No new publication for experiment %d; Would reset search deferral", expAnnotations.getId())); + } + else + { + log.info(String.format("No new publication for experiment %d; resetting search deferral", + expAnnotations.getId())); + datasetStatus.setUserDismissedPublication(new Date()); + DatasetStatusManager.update(datasetStatus, user); + } return null; } } @@ -306,17 +314,21 @@ private void processExperiments(List expAnnotationIds, ProcessingContex log.info(String.format("Posting reminder message to: %d message threads.", expAnnotationIds.size())); Set exptIds = new HashSet<>(expAnnotationIds); - try (DbScope.Transaction transaction = PanoramaPublicManager.getSchema().getScope().ensureTransaction()) + if (_test) { - if (_test) + log.info("RUNNING IN TEST MODE - MESSAGES WILL NOT BE POSTED."); + } + for (Integer experimentAnnotationsId : exptIds) + { + try (DbScope.Transaction transaction = PanoramaPublicManager.getSchema().getScope().ensureTransaction()) { - log.info("RUNNING IN TEST MODE - MESSAGES WILL NOT BE POSTED."); + processExperiment(experimentAnnotationsId, context, processingResults); + transaction.commit(); } - for (Integer experimentAnnotationsId : exptIds) + catch (Exception e) { - processExperiment(experimentAnnotationsId, context, processingResults); + log.error(String.format("Error processing experiment %d: %s", experimentAnnotationsId, e.getMessage()), e); } - transaction.commit(); } processingResults.logResults(log); @@ -365,7 +377,7 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex } // Check for publications if enabled - PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, getUser(), processingResults._log); + PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, getUser(), context.isTestMode(), processingResults._log); if (!context.isTestMode()) { @@ -417,6 +429,7 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, @Nullable datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + datasetStatus.setCitation(publicationResult.getCitation()); } DatasetStatusManager.save(datasetStatus, getUser()); @@ -434,6 +447,7 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations, @Nullable datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + datasetStatus.setCitation(publicationResult.getCitation()); datasetStatus.setUserDismissedPublication(null); // Clear dismissal for new publication } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp index b7c75723..6f01ceae 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp @@ -17,7 +17,7 @@ ExperimentAnnotations experiment = bean.getExperimentAnnotations(); ShortURLRecord shortUrl = experiment.getShortUrl(); String submitterName = experiment.getSubmitterName(); - ActionURL postUrl = new ActionURL(PanoramaPublicController.NotifySubmitterOfPublicationsAction.class, getContainer()); + ActionURL postUrl = new ActionURL(PanoramaPublicController.NotifySubmitterOfPublicationAction.class, getContainer()); %>