diff --git a/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql new file mode 100644 index 00000000..e8ecb041 --- /dev/null +++ b/panoramapublic/resources/schemas/dbscripts/postgresql/panoramapublic-25.003-25.004.sql @@ -0,0 +1,7 @@ + +-- Add publication tracking columns to DatasetStatus +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PotentialPublicationId VARCHAR(255); +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationType VARCHAR(50); +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN PublicationMatchInfo TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN Citation TEXT; +ALTER TABLE panoramapublic.DatasetStatus ADD COLUMN UserDismissedPublication TIMESTAMP; diff --git a/panoramapublic/resources/schemas/panoramapublic.xml b/panoramapublic/resources/schemas/panoramapublic.xml index 6b3acf79..e2753337 100644 --- a/panoramapublic/resources/schemas/panoramapublic.xml +++ b/panoramapublic/resources/schemas/panoramapublic.xml @@ -864,6 +864,21 @@ + + Publication ID (PubMed ID, PMC ID) suggestion based on NCBI search + + + Type of publication ID: PMID, PMC + + + Information about which fields (e.g. PXD, Panorama link, title etc.) matched + + + NLM citation string for the potential publication match + + + Date when the user dismissed the publication suggestion for this dataset + \ No newline at end of file diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java index 1a278a0b..2c550553 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicController.java @@ -155,6 +155,11 @@ import org.labkey.panoramapublic.datacite.DoiMetadata; import org.labkey.panoramapublic.message.PrivateDataMessageScheduler; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.MockNcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchServiceImpl; +import org.labkey.panoramapublic.ncbi.PublicationMatch; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; import org.labkey.panoramapublic.model.CatalogEntry; import org.labkey.panoramapublic.model.DataLicense; import org.labkey.panoramapublic.model.DatasetStatus; @@ -262,7 +267,7 @@ import static org.labkey.api.util.DOM.Attribute.width; import static org.labkey.api.util.DOM.LK.ERRORS; import static org.labkey.api.util.DOM.LK.FORM; -import static org.labkey.panoramapublic.proteomexchange.NcbiUtils.PUBMED_ID; +import static org.labkey.panoramapublic.ncbi.NcbiConstants.PUBMED_ID; /** * User: vsharma @@ -6946,7 +6951,6 @@ public ModelAndView getView(PublicationDetailsForm form, boolean reshow, BindExc return new SimpleErrorView(errors); } - form.setPubmedId(_copiedExperiment.getPubmedId()); form.setLink(_copiedExperiment.getPublicationLink()); form.setCitation(_copiedExperiment.getCitation()); return getPublicationDetailsView(form, errors); @@ -7045,7 +7049,7 @@ public boolean handlePost(PublicationDetailsForm form, BindException errors) { if (form.hasPubmedId()) { - Pair linkAndCitation = NcbiUtils.getLinkAndCitation(form.getPubmedId()); + Pair linkAndCitation = NcbiPublicationSearchService.get().getPubMedLinkAndCitation(form.getPubmedId()); if (linkAndCitation != null) { form.setLink(linkAndCitation.first); @@ -10113,6 +10117,14 @@ else if (form.getExtensionLength() < 0) { errors.reject(ERROR_MSG, "Value for 'Extension duration' cannot be less than 0."); } + if (form.getPublicationSearchFrequency() == null) + { + errors.reject(ERROR_MSG, "Please enter a value for 'Publication search frequency'."); + } + else if (form.getPublicationSearchFrequency() < 1) + { + errors.reject(ERROR_MSG, "Value for 'Publication search frequency' must be at least 1."); + } if (form.getReminderTime() == null) { errors.reject(ERROR_MSG, "Please enter a value for 'Reminder time'."); @@ -10135,6 +10147,8 @@ public ModelAndView getView(PrivateDataReminderSettingsForm form, boolean reshow form.setDelayUntilFirstReminder(settings.getDelayUntilFirstReminder()); form.setReminderFrequency(settings.getReminderFrequency()); form.setExtensionLength(settings.getExtensionLength()); + form.setEnablePublicationSearch(settings.isEnablePublicationSearch()); + form.setPublicationSearchFrequency(settings.getPublicationSearchFrequency()); } VBox view = new VBox(); @@ -10153,6 +10167,8 @@ public boolean handlePost(PrivateDataReminderSettingsForm form, BindException er settings.setDelayUntilFirstReminder(form.getDelayUntilFirstReminder()); settings.setReminderFrequency(form.getReminderFrequency()); settings.setExtensionLength(form.getExtensionLength()); + settings.setEnablePublicationSearch(form.isEnablePublicationSearch()); + settings.setPublicationSearchFrequency(form.getPublicationSearchFrequency()); PrivateDataReminderSettings.save(settings); PrivateDataMessageScheduler.getInstance().initialize(settings.isEnableReminders()); @@ -10190,6 +10206,8 @@ public static class PrivateDataReminderSettingsForm private Integer _extensionLength; private Integer _reminderFrequency; private Integer _delayUntilFirstReminder; + private boolean _enablePublicationSearch; + private Integer _publicationSearchFrequency; public boolean isEnabled() { @@ -10240,6 +10258,26 @@ public void setDelayUntilFirstReminder(Integer delayUntilFirstReminder) { _delayUntilFirstReminder = delayUntilFirstReminder; } + + public boolean isEnablePublicationSearch() + { + return _enablePublicationSearch; + } + + public void setEnablePublicationSearch(boolean enablePublicationSearch) + { + _enablePublicationSearch = enablePublicationSearch; + } + + public Integer getPublicationSearchFrequency() + { + return _publicationSearchFrequency; + } + + public void setPublicationSearchFrequency(Integer publicationSearchFrequency) + { + _publicationSearchFrequency = publicationSearchFrequency; + } } @RequiresPermission(AdminOperationsPermission.class) @@ -10258,15 +10296,15 @@ public ModelAndView getView(PrivateDataSendReminderForm form, boolean reshow, Bi return new SimpleErrorView(errors, true); } - QuerySettings qSettings = new QuerySettings(getViewContext(), PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, - PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); - qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); - qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); + // Initialize form with current settings on first view + if (!reshow) + { + // Check the "Search for Publications" box if enabled in the saved settings + PrivateDataReminderSettings settings = PrivateDataReminderSettings.get(); + form.setSearchPublications(settings.isEnablePublicationSearch()); + } - QueryView tableView = new QueryView(new PanoramaPublicSchema(getUser(), getContainer()), qSettings, null); - tableView.setTitle("Private Panorama Private Datasets"); - tableView.setFrame(WebPartView.FrameType.NONE); - tableView.disableContainerFilterSelection(); + QueryView tableView = getPrivateExperimentsQueryView(getViewContext(), getContainer(), getUser()); form.setDataRegionName(tableView.getDataRegionName()); @@ -10290,7 +10328,8 @@ public boolean handlePost(PrivateDataSendReminderForm form, BindException errors PipelineService.get().getPipelineRootSetting(getContainer()), JournalManager.getJournal(getContainer()), form.getSelectedExperimentIds(), - form.getTestMode()); + form.getTestMode(), + form.isSearchPublications()); PipelineService.get().queueJob(job); return true; } @@ -10310,9 +10349,24 @@ public void addNavTrail(NavTree root) } } + private static @NotNull QueryView getPrivateExperimentsQueryView(ViewContext viewContext, Container container, User user) + { + QuerySettings qSettings = new QuerySettings(viewContext, PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS, + PanoramaPublicSchema.TABLE_EXPERIMENT_ANNOTATIONS); + qSettings.setContainerFilterName(ContainerFilter.Type.CurrentAndSubfolders.name()); + qSettings.setBaseFilter(new SimpleFilter(FieldKey.fromParts("Public"), "No")); + + QueryView tableView = new QueryView(new PanoramaPublicSchema(user, container), qSettings, null); + tableView.setTitle("Private Panorama Public Datasets"); + tableView.setFrame(WebPartView.FrameType.NONE); + tableView.disableContainerFilterSelection(); + return tableView; + } + public static class PrivateDataSendReminderForm { private boolean _testMode; + private boolean _searchPublications; private String _selectedIds; private String _dataRegionName = null; @@ -10326,6 +10380,16 @@ public void setTestMode(boolean testMode) _testMode = testMode; } + public boolean isSearchPublications() + { + return _searchPublications; + } + + public void setSearchPublications(boolean searchPublications) + { + _searchPublications = searchPublications; + } + public String getSelectedIds() { return _selectedIds; @@ -10356,8 +10420,56 @@ public void setDataRegionName(String dataRegionName) } } + @RequiresPermission(AdminOperationsPermission.class) + public static class SearchPublicationsAction extends SimpleViewAction + { + @Override + public ModelAndView getView(Object form, BindException errors) + { + Journal panoramaPublic = JournalManager.getJournal(getContainer()); + if (panoramaPublic == null) + { + errors.reject(ERROR_MSG, "Not a Panorama Public folder: " + getContainer().getName()); + return new SimpleErrorView(errors, true); + } + + QueryView tableView = getPrivateExperimentsQueryView(getViewContext(), getContainer(), getUser()); + + SearchPublicationsBean bean = new SearchPublicationsBean(); + bean.setDataRegionName(tableView.getDataRegionName()); + + JspView jspView = new JspView<>("/org/labkey/panoramapublic/view/searchPublicationsForm.jsp", bean, errors); + VBox view = new VBox(jspView, tableView); + view.setTitle("Search Publications"); + view.setFrame(WebPartView.FrameType.PORTAL); + return view; + } + + @Override + public void addNavTrail(NavTree root) + { + root.addChild("Private Data Reminder Settings", new ActionURL(PrivateDataReminderSettingsAction.class, ContainerManager.getRoot())); + root.addChild("Search Publications"); + } + } + + public static class SearchPublicationsBean + { + private String _dataRegionName; + + public String getDataRegionName() + { + return _dataRegionName; + } + + public void setDataRegionName(String dataRegionName) + { + _dataRegionName = dataRegionName; + } + } + @RequiresAnyOf({AdminPermission.class, PanoramaPublicSubmitterPermission.class}) - public abstract class UpdateDatasetStatusAction extends ConfirmAction + public abstract static class UpdateDatasetStatusAction extends ConfirmAction { protected ExperimentAnnotations _exptAnnotations; protected DatasetStatus _datasetStatus; @@ -10366,13 +10478,15 @@ public abstract class UpdateDatasetStatusAction extends ConfirmAction + { + private ExperimentAnnotations _exptAnnotations; + + @Override + public ModelAndView getView(ExperimentIdForm form, BindException errors) + { + _exptAnnotations = getValidExperiment(form, getContainer(), getViewContext(), errors); + if (_exptAnnotations == null) + { + return new SimpleErrorView(errors); + } + + List matches; + try + { + matches = NcbiPublicationSearchService.get().searchForPublication(_exptAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, true); + } + catch (Exception e) + { + LOG.error("Error searching for publications for experiment " + _exptAnnotations.getId(), e); + errors.reject(ERROR_MSG, "Error searching for publications: " + e.getMessage()); + return new SimpleErrorView(errors); + } + + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(_exptAnnotations); + + // Check if any displayed match was dismissed by the user + + String dismissedPubId = null; + if (datasetStatus != null) + { + dismissedPubId = matches.stream() + .map(PublicationMatch::getPublicationId) + .filter(datasetStatus::isPublicationDismissed) + .findFirst() + .orElse(null); + } + boolean showDismissedColumn = dismissedPubId != null; + + SearchPublicationsForDatasetBean bean = new SearchPublicationsForDatasetBean(_exptAnnotations, matches, showDismissedColumn, dismissedPubId); + JspView jspView = new JspView<>("/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp", bean, errors); + jspView.setFrame(WebPartView.FrameType.PORTAL); + jspView.setTitle("Publications Matches for Dataset"); + return jspView; + } + + @Override + public void addNavTrail(NavTree root) + { + root.addChild("Publication Matches for Dataset"); + } + } + + public static class SearchPublicationsForDatasetBean + { + private final ExperimentAnnotations _experimentAnnotations; + private final List _matches; + private final boolean _showDismissedColumn; + private final String _dismissedPubId; + + public SearchPublicationsForDatasetBean(ExperimentAnnotations experimentAnnotations, List matches, + boolean showDismissedColumn, String dismissedPubId) + { + _experimentAnnotations = experimentAnnotations; + _matches = matches; + _showDismissedColumn = showDismissedColumn; + _dismissedPubId = dismissedPubId; + } + + public ExperimentAnnotations getExperimentAnnotations() + { + return _experimentAnnotations; + } + + public List getMatches() + { + return _matches; + } + + public boolean isShowDismissedColumn() + { + return _showDismissedColumn; + } + + public String getDismissedPubId() + { + return _dismissedPubId; + } + } + + @RequiresPermission(AdminOperationsPermission.class) + public static class SearchPublicationsForDatasetApiAction extends ReadOnlyApiAction + { + @Override + public Object execute(ExperimentIdForm form, BindException errors) + { + ApiSimpleResponse response = new ApiSimpleResponse(); + ExperimentAnnotations expAnnotations = form.lookupExperiment(); + if (expAnnotations == null) + { + response.put("success", false); + response.put("error", "No experiment found for Id " + form.getId()); + return response; + } + + try + { + List matches = NcbiPublicationSearchService.get().searchForPublication(expAnnotations, NcbiPublicationSearchService.MAX_RESULTS, null, false); + response.put("success", true); + response.put("papersFound", matches.size()); + + List matchList = new ArrayList<>(); + for (PublicationMatch match : matches) + { + matchList.add(match.toJson()); + } + response.put("matches", matchList); + } + catch (Exception e) + { + LOG.error("Error searching for publications for experiment " + form.getId(), e); + response.put("success", false); + response.put("error", "Error searching for publications: " + e.getMessage()); + } + + return response; + } + } + + @RequiresPermission(AdminOperationsPermission.class) + public static class NotifySubmitterOfPublicationAction extends FormHandlerAction + { + private Container _announcementsContainer; + private Announcement _announcement; + + @Override + public void validateCommand(NotifySubmitterForm form, Errors errors) + { + if (form.getId() <= 0) + { + errors.reject(ERROR_MSG, "Missing experiment annotations ID"); + } + if (StringUtils.isBlank(form.getPublicationId())) + { + errors.reject(ERROR_MSG, "No publication was selected"); + } + } + + @Override + public boolean handlePost(NotifySubmitterForm form, BindException errors) throws Exception + { + ExperimentAnnotations exptAnnotations = ExperimentAnnotationsManager.get(form.getId()); + if (exptAnnotations == null) + { + errors.reject(ERROR_MSG, "No experiment found for Id " + form.getId()); + return false; + } + + // Check if the user has already dismissed this publication suggestion + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(exptAnnotations); + if (datasetStatus != null && datasetStatus.isPublicationDismissed(form.getPublicationId())) + { + errors.reject(ERROR_MSG, "The user has already dismissed the publication suggestion " + + datasetStatus.getPublicationIdLabel() + + " for this dataset."); + return false; + } + + DB pubType = DB.fromString(form.getPublicationType()); + if (pubType == null) + { + errors.reject(ERROR_MSG, "Invalid publication type: " + form.getPublicationType()); + return false; + } + + // Reconstruct PublicationMatch from form fields + String citation = NcbiPublicationSearchService.get().getCitation(form.getPublicationId(), pubType); + PublicationMatch selectedMatch = PublicationMatch.fromMatchInfo(form.getPublicationId(), pubType, form.getMatchInfo(), citation); + + // Post notification + JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(exptAnnotations); + if (submission == null) + { + errors.reject(ERROR_MSG, "No journal submission found for experiment " + exptAnnotations.getId()); + return false; + } + Journal journal = JournalManager.getJournal(submission.getJournalId()); + if (journal == null) + { + errors.reject(ERROR_MSG, "No journal found for submission"); + return false; + } + + _announcementsContainer = journal.getSupportContainer(); + _announcement = submission.getAnnouncement(AnnouncementService.get(), _announcementsContainer, getUser()); + if (_announcement == null) + { + errors.reject(ERROR_MSG, "No support thread found for this submission"); + return false; + } + + User submitter = exptAnnotations.getSubmitterUser(); + if (submitter == null) + { + errors.reject(ERROR_MSG, "Could not find submitter for experiment " + exptAnnotations.getId()); + return false; + } + + List notifyUsers = new ArrayList<>(); + notifyUsers.add(submitter); + if (exptAnnotations.getLabHeadUser() != null) + { + notifyUsers.add(exptAnnotations.getLabHeadUser()); + } + + PanoramaPublicNotification.postPrivateDataReminderMessage( + journal, submission, exptAnnotations, submitter, getUser(), notifyUsers, + _announcement, _announcementsContainer, getUser(), selectedMatch); + + // Update DatasetStatus + if (datasetStatus == null) + { + datasetStatus = new DatasetStatus(); + datasetStatus.setExperimentAnnotationsId(exptAnnotations.getId()); + } + datasetStatus.setPotentialPublicationId(form.getPublicationId()); + datasetStatus.setPublicationType(pubType.name()); + datasetStatus.setPublicationMatchInfo(form.getMatchInfo()); + datasetStatus.setCitation(selectedMatch.getCitation()); + datasetStatus.setUserDismissedPublication(null); + datasetStatus.setLastReminderDate(new Date()); + + if (datasetStatus.getId() == 0) + { + DatasetStatusManager.save(datasetStatus, getUser()); + } + else + { + DatasetStatusManager.update(datasetStatus, getUser()); + } + + return true; + } + + @Override + public ActionURL getSuccessURL(NotifySubmitterForm form) + { + return new ActionURL("announcements", "thread", _announcementsContainer) + .addParameter("rowId", _announcement.getRowId()); + } + } + + public static class NotifySubmitterForm extends IdForm + { + private String _publicationId; + private String _publicationType; + private String _matchInfo; + + public String getPublicationId() + { + return _publicationId; + } + + public void setPublicationId(String publicationId) + { + _publicationId = publicationId; + } + + public String getPublicationType() + { + return _publicationType; + } + + public void setPublicationType(String publicationType) + { + _publicationType = publicationType; + } + + public String getMatchInfo() + { + return _matchInfo; + } + + public void setMatchInfo(String matchInfo) + { + _matchInfo = matchInfo; + } + + } + private static ExperimentAnnotations getValidExperimentAnnotations(ShortUrlForm shortUrlForm, Errors errors) { String shortUrlEntityId = shortUrlForm.getShortUrlEntityId(); @@ -10719,6 +11216,94 @@ public static ActionURL getCatalogImageDownloadUrl(ExperimentAnnotations expAnno .addParameter("name", filename); } + // ======================== Support actions for Selenium tests ======================== + + @RequiresSiteAdmin + public static class SetupMockNcbiServiceAction extends ReadOnlyApiAction + { + @Override + public Object execute(Object form, BindException errors) + { + NcbiPublicationSearchServiceImpl.setInstance(new MockNcbiPublicationSearchService()); + return new ApiSimpleResponse("mock", true); + } + } + + @RequiresSiteAdmin + public static class RestoreNcbiServiceAction extends ReadOnlyApiAction + { + @Override + public Object execute(Object form, BindException errors) + { + NcbiPublicationSearchServiceImpl.setInstance(new NcbiPublicationSearchServiceImpl()); + return new ApiSimpleResponse("restored", true); + } + } + + @RequiresSiteAdmin + public static class RegisterMockPublicationAction extends ReadOnlyApiAction + { + @Override + public Object execute(RegisterMockPublicationForm form, BindException errors) + { + NcbiPublicationSearchService service = NcbiPublicationSearchServiceImpl.getInstance(); + if (!(service instanceof MockNcbiPublicationSearchService mock)) + { + errors.reject(ERROR_MSG, "Mock NCBI service is not available. Call setupMockNcbiService first."); + return null; + } + mock.register(form.getDatabase(), form.getId(), form.getSearchKey(), + form.getPmid(), form.getTitle(), form.getAuthors(), + form.getPubDate(), form.getSource(), form.getJournalFull(), + form.getCitation()); + return new ApiSimpleResponse("registered", true); + } + } + + public static class RegisterMockPublicationForm + { + private String _database; + private String _id; + private String _searchKey; + private String _pmid; + private String _title; + private String _authors; + private String _pubDate; + private String _source; + private String _journalFull; + private String _citation; + + public String getDatabase() { return _database; } + public void setDatabase(String database) { _database = database; } + + public String getId() { return _id; } + public void setId(String id) { _id = id; } + + public String getSearchKey() { return _searchKey; } + public void setSearchKey(String searchKey) { _searchKey = searchKey; } + + public String getPmid() { return _pmid; } + public void setPmid(String pmid) { _pmid = pmid; } + + public String getTitle() { return _title; } + public void setTitle(String title) { _title = title; } + + public String getAuthors() { return _authors; } + public void setAuthors(String authors) { _authors = authors; } + + public String getPubDate() { return _pubDate; } + public void setPubDate(String pubDate) { _pubDate = pubDate; } + + public String getSource() { return _source; } + public void setSource(String source) { _source = source; } + + public String getJournalFull() { return _journalFull; } + public void setJournalFull(String journalFull) { _journalFull = journalFull; } + + public String getCitation() { return _citation; } + public void setCitation(String citation) { _citation = citation; } + } + public static class TestCase extends AbstractActionPermissionTest { @Override diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java index cc5aca7e..1e18391a 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicModule.java @@ -47,6 +47,7 @@ import org.labkey.panoramapublic.bluesky.PanoramaPublicLogoResourceType; import org.labkey.panoramapublic.catalog.CatalogImageAttachmentType; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchServiceImpl; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.speclib.SpecLibKey; import org.labkey.panoramapublic.pipeline.CopyExperimentPipelineProvider; @@ -92,7 +93,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 25.003; + return 25.004; } @Override @@ -382,6 +383,7 @@ public Set getSchemaNames() set.add(CatalogEntryManager.TestCase.class); set.add(BlueskyApiClient.TestCase.class); set.add(PrivateDataReminderSettings.TestCase.class); + set.add(NcbiPublicationSearchServiceImpl.TestCase.class); return set; } diff --git a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java index b8633b12..a775d148 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java +++ b/panoramapublic/src/org/labkey/panoramapublic/PanoramaPublicNotification.java @@ -28,6 +28,7 @@ import org.labkey.panoramapublic.model.JournalExperiment; import org.labkey.panoramapublic.model.JournalSubmission; import org.labkey.panoramapublic.model.Submission; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -358,22 +359,63 @@ public static void postDataDeletionRequestMessage(@NotNull Journal journal, @Not postNotificationFullTitle(journal, je, messageBody.toString(), journalAdmin, messageTitle, AnnouncementService.StatusOption.Active, null); } + public static void postPublicationDismissalMessage(@NotNull Journal journal, @NotNull JournalExperiment je, @NotNull ExperimentAnnotations expAnnotations, User submitter, + @NotNull PublicationMatch publicationMatch) + { + User journalAdmin = JournalManager.getJournalAdminUser(journal); + if (journalAdmin == null) + { + throw new NotFoundException(String.format("Could not find an admin user for %s.", journal.getName())); + } + + String messageTitle = "Publication Suggestion Dismissed" +" - " + expAnnotations.getShortUrl().renderShortURL(); + + StringBuilder messageBody = new StringBuilder(); + messageBody.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); + messageBody.append("Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public."); + + messageBody.append(NL2).append(bold("Dismissed Publication:")).append(" ").append(publicationMatch.getPublicationIdLabel()); + if (!StringUtils.isBlank(publicationMatch.getCitation())) + { + messageBody.append(NL).append(publicationMatch.getCitation()); + } + + messageBody.append(NL2).append("We will no longer suggest this paper for your dataset. ") + .append("If you would like to make your data public, you can do so at any time ") + .append("by clicking the \"Make Public\" button in your data folder, or by clicking this link: ") + .append(bold(link("Make Data Public", PanoramaPublicController.getMakePublicUrl(expAnnotations.getId(), expAnnotations.getContainer()).getURIString()))) + .append("."); + messageBody.append(NL2).append("Best regards,"); + messageBody.append(NL).append(getUserName(journalAdmin)); + + postNotificationFullTitle(journal, je, messageBody.toString(), journalAdmin, messageTitle, AnnouncementService.StatusOption.Closed, null); + } + public static void postPrivateDataReminderMessage(@NotNull Journal journal, @NotNull JournalSubmission js, @NotNull ExperimentAnnotations expAnnotations, @NotNull User submitter, @NotNull User messagePoster, List notifyUsers, - @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin) + @NotNull Announcement announcement, @NotNull Container announcementsContainer, @NotNull User journalAdmin, + @Nullable PublicationMatch articleMatch) { - String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin); - String title = "Action Required: Status Update for Your Private Data on Panorama Public"; + String message = getDataStatusReminderMessage(expAnnotations, submitter, js, announcement, announcementsContainer, journalAdmin, articleMatch); + String title = articleMatch != null + ? "Action Required: Publication Found for Your Data on Panorama Public" + : "Action Required: Status Update for Your Private Data on Panorama Public"; postNotificationFullTitle(journal, js.getJournalExperiment(), message, messagePoster, title, AnnouncementService.StatusOption.Closed, notifyUsers); } public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations exptAnnotations, @NotNull User submitter, @NotNull JournalSubmission js,@NotNull Announcement announcement, - @NotNull Container announcementContainer, @NotNull User journalAdmin) + @NotNull Container announcementContainer, @NotNull User journalAdmin, + @Nullable PublicationMatch articleMatch) { String shortUrl = exptAnnotations.getShortUrl().renderShortURL(); - String makePublicLink = PanoramaPublicController.getMakePublicUrl(exptAnnotations.getId(), exptAnnotations.getContainer()).getURIString(); + ActionURL makePublicUrl = PanoramaPublicController.getMakePublicUrl(exptAnnotations.getId(), exptAnnotations.getContainer()); + if (articleMatch != null && articleMatch.isPubMed() && articleMatch.getPublicationId() != null) + { + makePublicUrl.replaceParameter("pubmedId", articleMatch.getPublicationId()); + } + String makePublicLink = makePublicUrl.getURIString(); String dateString = DateUtil.formatDateTime(js.getLatestSubmission().getCreated(), PrivateDataReminderSettings.DATE_FORMAT_PATTERN); ActionURL viewMessageUrl = new ActionURL("announcements", "thread", announcementContainer) @@ -386,32 +428,55 @@ public static String getDataStatusReminderMessage(@NotNull ExperimentAnnotations ActionURL requestExtensionUrl = new ActionURL(PanoramaPublicController.RequestExtensionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId", shortUrlEntityId); - ActionURL requesDeletionUrl = new ActionURL(PanoramaPublicController.RequestDeletionAction.class, exptAnnotations.getContainer()) + ActionURL requestDeletionUrl = new ActionURL(PanoramaPublicController.RequestDeletionAction.class, exptAnnotations.getContainer()) .addParameter("shortUrlEntityId",shortUrlEntityId); + ActionURL dismissPublicationUrl = new ActionURL(PanoramaPublicController.DismissPublicationSuggestionAction.class, exptAnnotations.getContainer()) + .addParameter("shortUrlEntityId", shortUrlEntityId); ExperimentAnnotations sourceExperiment = ExperimentAnnotationsManager.get(exptAnnotations.getSourceExperimentId()); StringBuilder message = new StringBuilder(); - message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2) - .append("We are reaching out regarding your data on Panorama Public (").append(shortUrl).append("), which has been private since ") - .append(dateString).append(".") - .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) - .append(NL2).append(bold("Is the paper associated with this work already published?")) - .append(NL).append("- If yes: Please make your data public by clicking the \"Make Public\" button in your folder or by clicking this link: ") - .append(bold(link("Make Data Public", makePublicLink))) - .append(". This helps ensure that your valuable research is easily accessible to the community.") - .append(NL).append("- If not: You have a couple of options:") - .append(NL).append(" - ").append(bold("Request an Extension")).append(" - If your paper is still under review, or you need additional time, please let us know by clicking ") - .append(bold(link("Request Extension", requestExtensionUrl.getURIString()))).append(".") - .append(NL).append(" - ").append(bold("Delete from Panorama Public")).append(" - If you no longer wish to host your data on Panorama Public, please click ") - .append(bold(link("Request Deletion", requesDeletionUrl.getURIString()))).append(". ") - .append("We will remove your data from Panorama Public."); - if (sourceExperiment != null) + message.append("Dear ").append(getUserName(submitter)).append(",").append(NL2); + + if (articleMatch != null) + { + // Message variant when a publication was found + message.append("We found a paper that appears to be associated with your private data on Panorama Public (").append(shortUrl).append(").") + .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) + .append(NL2).append(bold("Publication Found:")).append(" ") + .append(articleMatch.getCitation() != null + ? link(articleMatch.getCitation(), articleMatch.getPublicationUrl()) + : link(articleMatch.getPublicationIdLabel(), articleMatch.getPublicationUrl())) + .append(NL2).append("If this is indeed your paper, congratulations! We encourage you to make your data public so the research community can access it alongside your paper. ") + .append("You can do this by clicking the \"Make Public\" button in your data folder or by clicking this link: ") + .append(bold(link("Make Data Public", makePublicLink))).append(".") + .append(articleMatch.isPubMed() ? " Please enter " + articleMatch.getPublicationId() + " in the PubMed ID field." : "") + .append(NL2).append("If this paper is not associated with your data please let us know by clicking ") + .append(bold(link("Dismiss Publication Suggestion", dismissPublicationUrl.getURIString()))); + } + else { - message.append(" However, your source folder (") - .append(getContainerLink(sourceExperiment.getContainer())) - .append(") will remain intact, allowing you to resubmit your data in the future if you wish."); + // Original message variant when no publication was found + message.append("We are reaching out regarding your data on Panorama Public (").append(shortUrl).append("), which has been private since ") + .append(dateString).append(".") + .append(NL2).append(bold("Title:")).append(" ").append(escape(exptAnnotations.getTitle())) + .append(NL2).append(bold("Is the paper associated with this work already published?")) + .append(NL).append("- If yes: Please make your data public by clicking the \"Make Public\" button in your folder or by clicking this link: ") + .append(bold(link("Make Data Public", makePublicLink))) + .append(". This helps ensure that your valuable research is easily accessible to the community.") + .append(NL).append("- If not: You have a couple of options:") + .append(NL).append(" - ").append(bold("Request an Extension")).append(" - If your paper is still under review, or you need additional time, please let us know by clicking ") + .append(bold(link("Request Extension", requestExtensionUrl.getURIString()))).append(".") + .append(NL).append(" - ").append(bold("Delete from Panorama Public")).append(" - If you no longer wish to host your data on Panorama Public, please click ") + .append(bold(link("Request Deletion", requestDeletionUrl.getURIString()))).append(". ") + .append("We will remove your data from Panorama Public."); + if (sourceExperiment != null) + { + message.append(" However, your source folder (") + .append(getContainerLink(sourceExperiment.getContainer())) + .append(") will remain intact, allowing you to resubmit your data in the future if you wish."); + } } message.append(NL2).append("If you have any questions or need further assistance, please do not hesitate to respond to this message by ") diff --git a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java index ee45663c..4837c071 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java +++ b/panoramapublic/src/org/labkey/panoramapublic/message/PrivateDataReminderSettings.java @@ -24,12 +24,16 @@ public class PrivateDataReminderSettings public static final String PROP_DELAY_UNTIL_FIRST_REMINDER = "Delay until first reminder (months)"; public static final String PROP_REMINDER_FREQUENCY = "Reminder frequency (months)"; public static final String PROP_EXTENSION_LENGTH = "Extension duration (months)"; + public static final String PROP_ENABLE_PUBLICATION_SEARCH = "Enable publication search"; + public static final String PROP_PUBLICATION_SEARCH_FREQUENCY = "Publication search frequency (months)"; private static final boolean DEFAULT_ENABLE_REMINDERS = false; public static final String DEFAULT_REMINDER_TIME = "8:00 AM"; private static final int DEFAULT_DELAY_UNTIL_FIRST_REMINDER = 12; // Send the first reminder after the data has been private for a year. private static final int DEFAULT_REMINDER_FREQUENCY = 1; // Send reminders once a month, unless extension or deletion was requested. private static final int DEFAULT_EXTENSION_LENGTH = 6; // Private status of a dataset can be extended by 6 months. + private static final boolean DEFAULT_ENABLE_PUBLICATION_SEARCH = false; + private static final int DEFAULT_PUBLICATION_SEARCH_FREQUENCY = 3; // Re-search every 3 months after dismissal public static final String DATE_FORMAT_PATTERN = "MMMM d, yyyy"; public static final String REMINDER_TIME_FORMAT = "h:mm a"; @@ -40,6 +44,8 @@ public class PrivateDataReminderSettings private int _delayUntilFirstReminder; private int _reminderFrequency; private int _extensionLength; + private boolean _enablePublicationSearch; + private int _publicationSearchFrequency; public static PrivateDataReminderSettings get() { @@ -70,6 +76,16 @@ public static PrivateDataReminderSettings get() LocalTime reminderTime = tryParseReminderTime(settingsMap.get(PROP_REMINDER_TIME), DEFAULT_REMINDER_TIME); settings.setReminderTime(reminderTime); + + boolean enablePublicationCheck = settingsMap.get(PROP_ENABLE_PUBLICATION_SEARCH) == null + ? DEFAULT_ENABLE_PUBLICATION_SEARCH + : Boolean.valueOf(settingsMap.get(PROP_ENABLE_PUBLICATION_SEARCH)); + settings.setEnablePublicationSearch(enablePublicationCheck); + + int publicationSearchFrequency = settingsMap.get(PROP_PUBLICATION_SEARCH_FREQUENCY) == null + ? DEFAULT_PUBLICATION_SEARCH_FREQUENCY + : Integer.valueOf(settingsMap.get(PROP_PUBLICATION_SEARCH_FREQUENCY)); + settings.setPublicationSearchFrequency(publicationSearchFrequency); } else { @@ -78,6 +94,8 @@ public static PrivateDataReminderSettings get() settings.setReminderFrequency(DEFAULT_REMINDER_FREQUENCY); settings.setExtensionLength(DEFAULT_EXTENSION_LENGTH); settings.setReminderTime(parseReminderTime(DEFAULT_REMINDER_TIME)); + settings.setEnablePublicationSearch(DEFAULT_ENABLE_PUBLICATION_SEARCH); + settings.setPublicationSearchFrequency(DEFAULT_PUBLICATION_SEARCH_FREQUENCY); } return settings; @@ -112,6 +130,8 @@ public static void save(PrivateDataReminderSettings settings) settingsMap.put(PROP_REMINDER_FREQUENCY, String.valueOf(settings.getReminderFrequency())); settingsMap.put(PROP_EXTENSION_LENGTH, String.valueOf(settings.getExtensionLength())); settingsMap.put(PROP_REMINDER_TIME, settings.getReminderTimeFormatted()); + settingsMap.put(PROP_ENABLE_PUBLICATION_SEARCH, String.valueOf(settings.isEnablePublicationSearch())); + settingsMap.put(PROP_PUBLICATION_SEARCH_FREQUENCY, String.valueOf(settings.getPublicationSearchFrequency())); settingsMap.save(); } @@ -170,6 +190,26 @@ public void setDelayUntilFirstReminder(int delayUntilFirstReminder) _delayUntilFirstReminder = delayUntilFirstReminder; } + public boolean isEnablePublicationSearch() + { + return _enablePublicationSearch; + } + + public void setEnablePublicationSearch(boolean enablePublicationSearch) + { + _enablePublicationSearch = enablePublicationSearch; + } + + public int getPublicationSearchFrequency() + { + return _publicationSearchFrequency; + } + + public void setPublicationSearchFrequency(int publicationSearchFrequency) + { + _publicationSearchFrequency = publicationSearchFrequency; + } + public @Nullable Date getReminderValidUntilDate(@NotNull DatasetStatus status) { return status.getLastReminderDate() == null ? null : addMonths(status.getLastReminderDate(), getReminderFrequency()); @@ -190,6 +230,14 @@ public boolean isExtensionValid(@NotNull DatasetStatus status) return isDateInFuture(getExtensionValidUntilDate(status)); } + public boolean isPublicationDismissalRecent(@NotNull DatasetStatus status) + { + Date dismissed = status.getUserDismissedPublication(); + if (dismissed == null) return false; + Date searchDeferralEnd = addMonths(dismissed, getPublicationSearchFrequency()); + return isDateInFuture(searchDeferralEnd); + } + public @Nullable String extensionValidUntilFormatted(@NotNull DatasetStatus status) { Date date = getExtensionValidUntilDate(status); diff --git a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java index 717da27d..1a295efc 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java +++ b/panoramapublic/src/org/labkey/panoramapublic/model/DatasetStatus.java @@ -1,17 +1,23 @@ package org.labkey.panoramapublic.model; import org.jetbrains.annotations.Nullable; -import org.labkey.api.view.ShortURLRecord; import org.labkey.panoramapublic.message.PrivateDataReminderSettings; +import org.labkey.panoramapublic.ncbi.NcbiConstants; import java.util.Date; public class DatasetStatus extends DbEntity { + private int _experimentAnnotationsId; private Date _lastReminderDate; private Date _extensionRequestedDate; private Date _deletionRequestedDate; + private String _potentialPublicationId; + private String _publicationType; + private String _publicationMatchInfo; + private String _citation; + private Date _userDismissedPublication; public int getExperimentAnnotationsId() { @@ -72,4 +78,66 @@ public boolean reminderSent() { return _lastReminderDate != null; } + + public String getPotentialPublicationId() + { + return _potentialPublicationId; + } + + public void setPotentialPublicationId(String potentialPublicationId) + { + _potentialPublicationId = potentialPublicationId; + } + + public String getPublicationType() + { + return _publicationType; + } + + public void setPublicationType(String publicationType) + { + _publicationType = publicationType; + } + + public String getPublicationIdLabel() + { + NcbiConstants.DB type = NcbiConstants.DB.fromString(_publicationType); + String label = type == null ? (_publicationType != null ? _publicationType : "") : type.name(); + return label + " ID " + getPotentialPublicationId(); + } + + public String getPublicationMatchInfo() + { + return _publicationMatchInfo; + } + + public void setPublicationMatchInfo(String publicationMatchInfo) + { + _publicationMatchInfo = publicationMatchInfo; + } + + public String getCitation() + { + return _citation; + } + + public void setCitation(String citation) + { + _citation = citation; + } + + public Date getUserDismissedPublication() + { + return _userDismissedPublication; + } + + public void setUserDismissedPublication(Date userDismissedPublication) + { + _userDismissedPublication = userDismissedPublication; + } + + public boolean isPublicationDismissed(String publicationId) + { + return getUserDismissedPublication() != null && publicationId != null && publicationId.equals(getPotentialPublicationId()); + } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java new file mode 100644 index 00000000..baac21a0 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/MockNcbiPublicationSearchService.java @@ -0,0 +1,201 @@ +package org.labkey.panoramapublic.ncbi; + +import org.jetbrains.annotations.Nullable; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Mock implementation of {@link NcbiPublicationSearchService} that returns canned data registered by tests. + * Used by Selenium tests when running on TeamCity. + * Extends {@link NcbiPublicationSearchServiceImpl} and only overrides {@link #getString(String)}, + * the single method that makes HTTP calls to NCBI. All search logic, filtering, author/title + * verification, citation parsing, and priority filtering run through the real implementation code. + * Tests register mock articles via {@link #register}, providing the database, ID, search key, + * metadata fields, and citation. The mock builds internal lookup maps from this data and returns + * appropriate responses when the real search logic calls {@code getString()}. + */ +public class MockNcbiPublicationSearchService extends NcbiPublicationSearchServiceImpl +{ + // ESearch: searchKey -> list of IDs (per database) + private final Map> _pmcSearchResults = new HashMap<>(); + private final Map> _pubmedSearchResults = new HashMap<>(); + + // ESummary: ID -> metadata JSONObject (per database) + private final Map _pmcMetadata = new HashMap<>(); + private final Map _pubmedMetadata = new HashMap<>(); + + // Citations: PMID -> citation string + private final Map _citations = new HashMap<>(); + + /** + * Register a mock article. The mock stores the data in internal lookup maps used by + * {@link #getString(String)}. + * @param database "pmc" or "pubmed" — the NCBI database this article is in + * @param id the article ID in the given database (numeric ID for pmc or pubmed) + * @param searchKey what ESearch query term finds this article (e.g. PXD ID for PMC, author last name for PubMed) + * @param pmid linked PubMed ID for PMC articles. PMC articles usually have a corresponding PMID in their + * metadata (articleids array), which the real code extracts via {@code extractPubMedId()}. + * Null for pubmed articles (where {@code id} is already the PMID). + * @param title article title + * @param authors comma-separated author list (e.g. "Abbatiello SE,Mani DR,Schilling B") + * @param pubDate publication date in "YYYY/MM/DD HH:MM" format, matching the NCBI ESummary + * sortpubdate (PubMed) or sortdate (PMC) field that parsePublicationDate() checks first + * @param source journal abbreviation used as ESummary "source" field (e.g. "Mol Cell Proteomics", "bioRxiv") + * @param journalFull full journal name used as ESummary "fulljournalname" field + * @param citation NLM citation string (for getCitation, keyed by PMID; null if not applicable) + */ + public void register(String database, String id, String searchKey, + @Nullable String pmid, String title, String authors, + String pubDate, String source, String journalFull, + @Nullable String citation) + { + boolean isPmc = "pmc".equalsIgnoreCase(database); + + // Build ESummary metadata + JSONObject metadata = new JSONObject(); + metadata.put("title", title); + metadata.put("sorttitle", title.toLowerCase()); + metadata.put("source", source); + metadata.put("fulljournalname", journalFull); + // Real NCBI ESummary returns sortpubdate (PubMed) or sortdate (PMC) in "YYYY/MM/DD HH:MM" format. + // parsePublicationDate() checks these first before falling back to pubdate. + metadata.put(isPmc ? "sortdate" : "sortpubdate", pubDate); + + // Authors array + JSONArray authorsArray = new JSONArray(); + for (String author : authors.split(",")) + { + authorsArray.put(new JSONObject().put("name", author.trim())); + } + metadata.put("authors", authorsArray); + + // For PMC articles, add articleids with PMC ID and optional PMID + if (isPmc) + { + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC" + id)); + if (pmid != null) + { + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", pmid)); + } + metadata.put("articleids", articleIds); + } + + // Store in appropriate maps + if (isPmc) + { + _pmcSearchResults.computeIfAbsent(searchKey, k -> new ArrayList<>()).add(id); + _pmcMetadata.put(id, metadata); + } + else + { + _pubmedSearchResults.computeIfAbsent(searchKey, k -> new ArrayList<>()).add(id); + _pubmedMetadata.put(id, metadata); + } + + // Store citation keyed by PMID. + // For PMC articles, the PMID is in the pmid parameter. + // For PubMed articles, the id parameter is already the PMID. + if (citation != null) + { + String citationKey = isPmc ? pmid : id; + if (citationKey != null) + { + _citations.put(citationKey, citation); + } + } + } + + /** + * Returns canned responses for NCBI API requests based on registered mock data. + * Handles ESearch, ESummary, and Citation Exporter URLs. + */ + @Override + protected String getString(String url) throws IOException + { + if (url.contains("esearch.fcgi")) + { + return handleESearch(url).toString(); + } + else if (url.contains("esummary.fcgi")) + { + return handleESummary(url).toString(); + } + else if (url.contains("lit/ctxp")) + { + return handleCitation(url).toString(); + } + throw new IOException("MockNcbiPublicationSearchService: unexpected URL: " + url); + } + + private JSONObject handleESearch(String url) + { + boolean isPmc = url.contains("db=pmc"); + Map> searchMap = isPmc ? _pmcSearchResults : _pubmedSearchResults; + + JSONArray idList = new JSONArray(); + for (Map.Entry> entry : searchMap.entrySet()) + { + if (url.contains(entry.getKey())) + { + entry.getValue().forEach(idList::put); + } + } + + JSONObject esearchResult = new JSONObject(); + esearchResult.put("idlist", idList); + return new JSONObject().put("esearchresult", esearchResult); + } + + private JSONObject handleESummary(String url) + { + boolean isPmc = url.contains("db=pmc"); + Map metadataMap = isPmc ? _pmcMetadata : _pubmedMetadata; + + JSONObject result = new JSONObject(); + for (Map.Entry entry : metadataMap.entrySet()) + { + if (url.contains(entry.getKey())) + { + result.put(entry.getKey(), entry.getValue()); + } + } + + return new JSONObject().put("result", result); + } + + /** + * Build a citation JSON response matching the NCBI Literature Citation Exporter format. + * The real API returns {@code {"nlm":{"orig":"citation text..."}}}. + * If no citation is registered for the ID, returns an empty JSON object. + */ + private JSONObject handleCitation(String url) + { + // Extract the publication ID from the URL (last segment after "id=") + String id = null; + int idIdx = url.indexOf("id="); + if (idIdx >= 0) + { + id = url.substring(idIdx + 3); + // Remove any trailing query parameters + int ampIdx = id.indexOf('&'); + if (ampIdx >= 0) + { + id = id.substring(0, ampIdx); + } + } + + String citation = id != null ? _citations.get(id) : null; + if (citation != null) + { + return new JSONObject().put("nlm", new JSONObject().put("orig", citation)); + } + return new JSONObject(); + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java new file mode 100644 index 00000000..8647973d --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiConstants.java @@ -0,0 +1,48 @@ +package org.labkey.panoramapublic.ncbi; + +import org.jetbrains.annotations.Nullable; + +public class NcbiConstants +{ + /** Regex for validating PubMed IDs (1-8 digits). Not used for PMC IDs which may be longer. */ + public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 + + public enum DB + { + PubMed("PubMed"), + PMC("PubMed Central"); + + private final String _label; + + DB(String label) + { + _label = label; + } + + public String getLabel() + { + return _label; + } + + public static @Nullable DB fromString(@Nullable String database) + { + if (database == null) return null; + for (DB db : values()) + { + if (db.name().equals(database)) return db; + } + return null; + } + } + + public static String getPubmedLink(String pubmedId) + { + // Example: https://pubmed.ncbi.nlm.nih.gov/29331002 + return "https://pubmed.ncbi.nlm.nih.gov/" + pubmedId; + } + + public static String getPmcLink(String pmcId) + { + return "https://www.ncbi.nlm.nih.gov/pmc/articles/" + pmcId; + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java new file mode 100644 index 00000000..6256596b --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchService.java @@ -0,0 +1,35 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.util.Pair; +import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; + +import java.util.List; + +/** + * Service for searching PubMed Central (PMC) and PubMed for publications associated with Panorama Public datasets. + */ +public interface NcbiPublicationSearchService +{ + int MAX_RESULTS = 5; + + static NcbiPublicationSearchService get() + { + return NcbiPublicationSearchServiceImpl.getInstance(); + } + + @Nullable String getCitation(String publicationId, DB database); + + @Nullable Pair getPubMedLinkAndCitation(String pubmedId); + + /** + * Searches PMC and PubMed for a publication associated with the experiment. + * Returns the top match (highest priority) if multiple matches are found, or null if none. + */ + @Nullable PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger); + + List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations); +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java new file mode 100644 index 00000000..83fec024 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/NcbiPublicationSearchServiceImpl.java @@ -0,0 +1,1391 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.config.ConnectionConfig; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.client5.http.impl.io.BasicHttpClientConnectionManager; +import org.apache.hc.client5.http.HttpResponseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.util.Timeout; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Test; +import org.labkey.api.util.Pair; +import org.labkey.api.util.StringUtilsLabKey; +import org.labkey.api.util.logging.LogHelper; +import org.labkey.panoramapublic.datacite.DataCiteService; +import org.labkey.panoramapublic.model.ExperimentAnnotations; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; + +import java.io.IOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.text.Normalizer; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_DOI; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PANORAMA_URL; +import static org.labkey.panoramapublic.ncbi.PublicationMatch.MATCH_PX_ID; + +/** + * Real implementation of {@link NcbiPublicationSearchService} that searches NCBI via HTTP. + */ +public class NcbiPublicationSearchServiceImpl implements NcbiPublicationSearchService +{ + private static final Logger LOG = LogHelper.getLogger(NcbiPublicationSearchServiceImpl.class, "Search NCBI for publications associated with Panorama Public datasets"); + + // Static holder for the service instance + private static volatile NcbiPublicationSearchService _instance = new NcbiPublicationSearchServiceImpl(); + + public static NcbiPublicationSearchService getInstance() + { + return _instance; + } + + public static void setInstance(NcbiPublicationSearchService impl) + { + _instance = impl; + } + + // NCBI API endpoints + private static final String ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; + private static final String ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"; + + // NCBI Literature Citation Exporter endpoints + private static final String PUBMED_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; + private static final String PMC_CITATION_EXPORTER_URL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=citation&id="; + + // API parameters + private static final int RATE_LIMIT_DELAY_MS = 400; // NCBI allows 3 requests/sec + private static final int TIMEOUT_MS = 10000; // 10 seconds + + // NCBI suggests using the 'tool' and 'email' parameters on E-utilities URLs + // https://www.nlm.nih.gov/dataguide/eutilities/utilities.html + private static final String TOOL = "PanoramaPublic"; + private static final String EMAIL = "panorama@proteinms.net"; + + // Preprint indicators + private static final String[] PREPRINT_INDICATORS = { + "preprint", "biorxiv", "medrxiv", "chemrxiv", "arxiv", + "research square", "preprints.org" + }; + + // Title keyword extraction + private static final int MIN_KEYWORD_LENGTH = 3; + private static final double KEYWORD_MATCH_THRESHOLD = 0.6; // 60% of keywords must match + private static final int MIN_KEYWORDS_FOR_MATCH = 2; + + // Stop words for title keyword matching. These words are not meaningful discriminators between papers. + // Function words: articles, prepositions, conjunctions, auxiliary verbs, pronouns. + // Title fillers: common words in paper titles that are not discriminating. + private static final Set TITLE_STOP_WORDS = Set.of( + + // Function words + "the", "this", "that", "these", "those", "its", // articles + "for", "with", "from", "into", "upon", "via", "after", "about", // prepositions + "between", "through", "across", "under", "over", "during", + "and", "but", "than", // conjunctions + "are", "was", "were", "been", "have", "has", "can", "may", // auxiliary verbs + "our", "their", // pronouns + "not", "all", "also", "both", "each", "how", "use", "used", // other function words + "here", "well", "two", "one", "more", "most", "only", "such", "other", "which", + + // Title fillers + "using", "based", "reveals", "revealed", "show", "shows", "shown", + "role", "study", "studies" + ); + + private static Logger getLog(@Nullable Logger logger) + { + return logger != null ? logger : LOG; + } + + @Override + public @Nullable String getCitation(String publicationId, DB database) + { + return getCitation(publicationId, database, null); + } + + public @Nullable String getCitation(String publicationId, DB database, @Nullable Logger logger) + { + Logger log = getLog(logger); + + if (publicationId == null || !publicationId.matches(NcbiConstants.PUBMED_ID)) + { + return null; + } + + String baseUrl = database == DB.PMC ? PMC_CITATION_EXPORTER_URL : PUBMED_CITATION_EXPORTER_URL; + String queryUrl = baseUrl + publicationId; + + try + { + String response = getString(queryUrl); + return parseCitation(response, publicationId, database, log); + } + catch (IOException e) + { + log.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); + } + return null; + } + + @Override + public Pair getPubMedLinkAndCitation(String pubmedId) + { + String citation = getCitation(pubmedId, DB.PubMed); + return citation != null ? new Pair<>(NcbiConstants.getPubmedLink(pubmedId), citation) : null; + } + + static String parseCitation(String response, String publicationId, DB database) + { + return parseCitation(response, publicationId, database, null); + } + + static String parseCitation(String response, String publicationId, DB database, @Nullable Logger logger) + { + Logger log = getLog(logger); + try + { + var jsonObject = new JSONObject(response); + var nlmInfo = jsonObject.optJSONObject("nlm"); + return null != nlmInfo ? nlmInfo.getString("orig") : null; + } + catch (JSONException e) + { + log.error("Error parsing response from NCBI Literature Citation Exporter for " + database.getLabel() + " ID " + publicationId, e); + } + return null; + } + + @Override + public PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, @Nullable Logger logger) + { + List matches = searchForPublication(expAnnotations, 1, logger, true); + return matches.isEmpty() ? null : matches.get(0); + } + + @Override + public List searchForPublication(@NotNull ExperimentAnnotations expAnnotations, int maxResults, @Nullable Logger logger, boolean getCitations) + { + Logger log = getLog(logger); + maxResults = Math.max(1, Math.min(maxResults, NcbiPublicationSearchService.MAX_RESULTS)); + log.info("Starting publication search for experiment: {}", expAnnotations.getId()); + + // Search PubMed Central first + List matchedArticles = searchPmc(expAnnotations, log); + + // If no PMC results, fall back to PubMed + if (matchedArticles.isEmpty()) + { + log.info("No PMC articles found, trying PubMed fallback"); + matchedArticles = searchPubMed(expAnnotations, log); + } + + // Build and return result + if (matchedArticles.isEmpty()) + { + log.info("No publications found"); + return Collections.emptyList(); + } + + if (matchedArticles.size() > maxResults) + { + matchedArticles = matchedArticles.subList(0, maxResults); + } + + if (getCitations) + { + // Fetch citations for each match + for (PublicationMatch match : matchedArticles) + { + rateLimit(); + match.setCitation(getCitation(match.getPublicationId(), match.getPublicationType(), log)); + } + } + + log.info("Returning {}", StringUtilsLabKey.pluralize(matchedArticles.size(), "publication")); + return matchedArticles; + } + + /** + * Search PubMed Central + */ + private @NotNull List searchPmc(@NotNull ExperimentAnnotations expAnnotations, Logger log) + { + Map searchTermsByStrategy = buildSearchTerms(expAnnotations); + + // Search PMC using each strategy, accumulating results + Map> pmcIdsByStrategy = new HashMap<>(); + for (Map.Entry entry : searchTermsByStrategy.entrySet()) + { + String strategy = entry.getKey(); + String searchTerm = entry.getValue(); + log.debug("Searching PMC by {}: {}", strategy, searchTerm); + List ids = searchPmc(quote(searchTerm), log); + if (!ids.isEmpty()) + { + pmcIdsByStrategy.put(strategy, ids); + log.debug("Found {} PMC articles by {}", ids.size(), strategy); + } + rateLimit(); + } + + // Build reverse map: PMC ID -> strategies that found it + Map> idToStrategies = new HashMap<>(); + pmcIdsByStrategy.forEach((strategy, ids) -> + ids.forEach(id -> idToStrategies.computeIfAbsent(id, k -> new ArrayList<>()).add(strategy))); + + log.info("Total unique PMC IDs found: {}", idToStrategies.size()); + + // Fetch and verify PMC articles + List pmcArticles = fetchAndVerifyPmcArticles(idToStrategies.keySet(), idToStrategies, expAnnotations, log); + + // Apply priority filtering + return applyPriorityFiltering(pmcArticles, expAnnotations.getCreated(), log); + } + + private @NotNull Map buildSearchTerms(@NotNull ExperimentAnnotations expAnnotations) + { + Map terms = new LinkedHashMap<>(); + + if (!StringUtils.isBlank(expAnnotations.getPxid())) + terms.put(MATCH_PX_ID, expAnnotations.getPxid()); + if (expAnnotations.getShortUrl() != null) + terms.put(MATCH_PANORAMA_URL, expAnnotations.getShortUrl().renderShortURL()); + if (!StringUtils.isBlank(expAnnotations.getDoi())) + terms.put(MATCH_DOI, expAnnotations.getDoi().startsWith("http") + ? expAnnotations.getDoi() + : DataCiteService.toUrl(expAnnotations.getDoi())); + return terms; + } + + /** + * Search PubMed Central with the given query + */ + private List searchPmc(String query, Logger log) + { + return executeSearch(query, "pmc", log); + } + + /** + * Search PubMed with the given query + */ + private List searchPubMed(String query, Logger log) + { + return executeSearch(query, "pubmed", log); + } + + /** + * Execute search using NCBI ESearch API + */ + private List executeSearch(String query, String database, Logger log) + { + String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = ESEARCH_URL + + "?db=" + database + + "&term=" + encodedQuery + + "&retmax=" + NcbiPublicationSearchService.MAX_RESULTS + + "&retmode=json" + + "&tool=" + TOOL + + "&email=" + URLEncoder.encode(EMAIL, StandardCharsets.UTF_8); + + try + { + JSONObject json = getJson(url); + JSONObject eSearchResult = json.getJSONObject("esearchresult"); + JSONArray idList = eSearchResult.getJSONArray("idlist"); + + List ids = new ArrayList<>(); + for (int i = 0; i < idList.length(); i++) + { + ids.add(idList.getString(i)); + } + return ids; + } + catch (IOException | JSONException e) + { + log.error("Error searching " + database + " with query: " + query , e); + return Collections.emptyList(); + } + } + + /** + * Execute an HTTP GET request and parse the response as JSON. + * @throws IOException if the request fails or the server returns a non-2xx response + * @throws JSONException if the response body is not valid JSON + */ + protected JSONObject getJson(String url) throws IOException + { + return new JSONObject(getString(url)); + } + + /** + * Execute an HTTP GET request and return the response body as a string. + * @throws IOException if the request fails or the server returns a non-2xx response + */ + protected String getString(String url) throws IOException + { + ConnectionConfig connectionConfig = ConnectionConfig.custom() + .setConnectTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .setSocketTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + RequestConfig requestConfig = RequestConfig.custom() + .setResponseTimeout(Timeout.ofMilliseconds(TIMEOUT_MS)) + .build(); + + BasicHttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(); + connectionManager.setConnectionConfig(connectionConfig); + + try (CloseableHttpClient client = HttpClientBuilder.create() + .setDefaultRequestConfig(requestConfig) + .setConnectionManager(connectionManager) + .build()) + { + HttpGet getRequest = new HttpGet(url); + return client.execute(getRequest, response -> { + int status = response.getCode(); + if (status < 200 || status >= 300) + { + throw new HttpResponseException(status, response.getReasonPhrase()); + } + return EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + }); + } + } + + /** + * Fetch metadata for PMC articles using ESummary API + */ + private Map fetchPmcMetadata(Collection pmcIds, Logger log) + { + return fetchMetadata(pmcIds, "pmc", log); + } + + /** + * Fetch metadata for PubMed articles using ESummary API + */ + private Map fetchPubMedMetadata(Collection pmids, Logger log) + { + return fetchMetadata(pmids, "pubmed", log); + } + + /** + * Fetch metadata using NCBI ESummary API (batch request) + */ + private Map fetchMetadata(Collection ids, String database, Logger log) + { + if (ids.isEmpty()) return Collections.emptyMap(); + + String idString = String.join(",", ids); + String url = ESUMMARY_URL + + "?db=" + database + + "&id=" + URLEncoder.encode(idString, StandardCharsets.UTF_8) + + "&retmode=json" + + "&tool=" + TOOL + + "&email=" + URLEncoder.encode(EMAIL, StandardCharsets.UTF_8); + + try + { + JSONObject json = getJson(url); + JSONObject result = json.optJSONObject("result"); + if (result == null) return Collections.emptyMap(); + + // Extract metadata for each ID + Map metadata = new HashMap<>(); + for (String id : ids) + { + JSONObject articleData = result.optJSONObject(id); + if (articleData != null) + { + metadata.put(id, articleData); + } + } + return metadata; + } + catch (IOException | JSONException e) + { + log.error("Error fetching {} metadata for IDs: {}", database, ids, e); + return Collections.emptyMap(); + } + } + + /** + * Fetch and verify PMC articles (filter preprints, check author/title matches) + */ + private List fetchAndVerifyPmcArticles( + Set pmcIds, + Map> idToStrategies, + ExperimentAnnotations expAnnotations, + Logger log) + { + if (pmcIds.isEmpty()) return Collections.emptyList(); + + // Fetch all metadata in batch + Map metadata = fetchPmcMetadata(pmcIds, log); + + List articles = new ArrayList<>(); + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + + for (String pmcId : pmcIds) + { + JSONObject articleData = metadata.get(pmcId); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + String source = articleData.optString("source", ""); + String journal = articleData.optString("fulljournalname", ""); + log.info("Excluded preprint PMC{}: source=\"{}\", journal=\"{}\"", pmcId, source, journal); + continue; + } + + // Check author and title matches + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, expAnnotations.getTitle()); + + // Extract PubMed ID, if found + String pubMedId = extractPubMedId(articleData); + String publicationId = pubMedId == null ? pmcId : pubMedId; + DB publicationType = pubMedId == null ? DB.PMC : DB.PubMed; + + // Get strategies that found this article + List strategies = idToStrategies.getOrDefault(pmcId, Collections.emptyList()); + + // Map strategies to boolean flags + boolean foundByPxId = strategies.contains(MATCH_PX_ID); + boolean foundByUrl = strategies.contains(MATCH_PANORAMA_URL); + boolean foundByDoi = strategies.contains(MATCH_DOI); + + Date pubDate = parsePublicationDate(articleData, log); + + PublicationMatch article = new PublicationMatch( + publicationId, + publicationType, + foundByPxId, + foundByUrl, + foundByDoi, + authorMatch, + titleMatch, + pubDate + ); + articles.add(article); + + log.info("PMC{} -> {} {} | Match info: {}", pmcId, publicationType, publicationId, article.getMatchInfo()); + } + + return articles; + } + + /** + * Apply priority filtering to narrow down results + */ + static List applyPriorityFiltering(List articles, @NotNull Date referenceDate, Logger log) + { + if (articles.size() <= 1) return articles; + + // Priority 1: Filter to articles found by multiple data IDs (most reliable) + List multipleIds = articles.stream() + .filter(a -> countDataIdMatches(a) >= 2) + .collect(Collectors.toList()); + + if (!multipleIds.isEmpty()) + { + log.debug("Filtered to {} article(s) found by multiple IDs", multipleIds.size()); + articles = multipleIds; + } + + if (articles.size() <= 1) return sortByDateProximity(articles, referenceDate); + + // Priority 2: Filter to articles with both Author AND Title match + List bothMatches = articles.stream() + .filter(a -> a.matchesAuthor() && a.matchesTitle()) + .collect(Collectors.toList()); + + if (!bothMatches.isEmpty()) + { + log.debug("Filtered to {} article(s) with both Author and Title match", bothMatches.size()); + articles = bothMatches; + } + + return sortByDateProximity(articles, referenceDate); + } + + /** + * Sort articles by proximity of publication date to the experiment creation date (proxy for data submission date) + * Articles with dates closest to the experiment creation date come first. + * Articles without a publication date are sorted to the end. + */ + static List sortByDateProximity(List articles, @NotNull Date experimentDate) + { + if (articles.size() <= 1) + { + return articles; + } + long refTime = experimentDate.getTime(); + articles = new ArrayList<>(articles); + articles.sort(Comparator.comparingLong(a -> + a.getPublicationDate() != null ? Math.abs(a.getPublicationDate().getTime() - refTime) : Long.MAX_VALUE + )); + return articles; + } + + private static int countDataIdMatches(PublicationMatch a) + { + int count = 0; + if (a.matchesProteomeXchangeId()) count++; + if (a.matchesPanoramaUrl()) count++; + if (a.matchesDoi()) count++; + return count; + } + + /** + * Fall back to PubMed search if PMC finds nothing + */ + private List searchPubMed(ExperimentAnnotations expAnnotations, Logger log) + { + String firstName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getFirstName() : null; + String lastName = expAnnotations.getSubmitterUser() != null + ? expAnnotations.getSubmitterUser().getLastName() : null; + String title = expAnnotations.getTitle(); + + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName) || StringUtils.isBlank(title)) + { + log.info("Cannot perform PubMed fallback - missing author or title information"); + return Collections.emptyList(); + } + + // Search PubMed: "LastName FirstName[Author] AND Title NOT preprint[Publication Type]" + String query = String.format("%s %s[Author] AND %s NOT preprint[Publication Type]", + lastName, firstName, title); + + log.debug("PubMed fallback query: {}", query); + List pmids = searchPubMed(query, log); + + if (pmids.isEmpty()) + { + log.info("PubMed fallback found no results"); + return Collections.emptyList(); + } + + log.info("PubMed fallback found {} result(s), verifying...", pmids.size()); + + // Fetch metadata and verify + Map metadata = fetchPubMedMetadata(pmids, log); + + List articles = new ArrayList<>(); + for (String pmid : pmids) + { + JSONObject articleData = metadata.get(pmid); + if (articleData == null) continue; + + // Filter out preprints + if (isPreprint(articleData)) + { + log.info("Excluded preprint PMID {}", pmid); + continue; + } + + // Verify both author AND title match + boolean authorMatch = checkAuthorMatch(articleData, firstName, lastName); + boolean titleMatch = checkTitleMatch(articleData, title); + + // Only accept if BOTH match + if (authorMatch && titleMatch) + { + Date pubDate = parsePublicationDate(articleData, log); + PublicationMatch article = new PublicationMatch( + pmid, + DB.PubMed, + false, // Not found by PX ID + false, // Not found by Panorama URL + false, // Not found by DOI + true, // Matched by author + true, // Matched by title + pubDate + ); + articles.add(article); + log.info("PMID {} verified with both Author and Title match", pmid); + } + } + + log.info("Verified {} of {} PubMed results", articles.size(), pmids.size()); + return articles; + } + + /** + * Check if article is a preprint + */ + static boolean isPreprint(JSONObject metadata) + { + String source = metadata.optString("source", "").toLowerCase(); + String journal = metadata.optString("fulljournalname", "").toLowerCase(); + + for (String indicator : PREPRINT_INDICATORS) + { + if (source.contains(indicator) || journal.contains(indicator)) + { + return true; + } + } + return false; + } + + /** + * Check if article matches the author (submitter). + * NCBI ESummary returns author names in "LastName Initials" format (e.g. "Jones A", "van der Berg M"). + * We match by checking that the author name starts with the submitter's last name followed by a space, + * and that the remainder starts with the submitter's first initial. + * Diacritics are stripped before comparison so that "García" matches "Garcia". + */ + static boolean checkAuthorMatch(JSONObject metadata, String firstName, String lastName) + { + if (StringUtils.isBlank(firstName) || StringUtils.isBlank(lastName)) + { + return false; + } + + JSONArray authors = metadata.optJSONArray("authors"); + if (authors == null || authors.isEmpty()) + { + return false; + } + + String firstInitial = stripDiacritics(firstName.substring(0, 1).toLowerCase()); + String lastNameLower = stripDiacritics(lastName.toLowerCase()); + + for (int i = 0; i < authors.length(); i++) + { + JSONObject author = authors.optJSONObject(i); + if (author == null) continue; + + String authorName = stripDiacritics(author.optString("name", "").toLowerCase()); + + // Match format: "LastName FirstInitial" - require a space after the last name + // to avoid prefix collisions (e.g. "Li" matching "Liang") + if (authorName.startsWith(lastNameLower)) + { + // Last name must be followed by a space (before initials) or be the entire name + if (authorName.length() == lastNameLower.length()) + { + return true; // Exact last name match, no initials + } + if (authorName.charAt(lastNameLower.length()) == ' ') + { + String afterLastName = authorName.substring(lastNameLower.length()).trim(); + if (afterLastName.startsWith(firstInitial)) + { + return true; + } + } + } + } + + return false; + } + + /** + * Strip diacritical marks from a string (e.g. "Villén" → "Villen", "Müller" → "Muller"). + * Uses Unicode NFD decomposition to separate base characters from combining marks, + * then removes the marks. + */ + static String stripDiacritics(String str) + { + if (str == null) return ""; + return Normalizer.normalize(str, Normalizer.Form.NFD) + .replaceAll("\\p{M}", ""); + } + + /** + * Check if article title matches the dataset title + */ + static boolean checkTitleMatch(JSONObject metadata, String datasetTitle) + { + if (StringUtils.isBlank(datasetTitle)) + { + return false; + } + + // Try title first, then sorttitle as fallback + String title = normalizeTitle(metadata.optString("title")); + String normalizedArticleTitle = title.isEmpty() + ? normalizeTitle(metadata.optString("sorttitle")) + : title; + + if (normalizedArticleTitle.isEmpty()) + { + return false; + } + + String normalizedDatasetTitle = normalizeTitle(datasetTitle); + + // Try exact match first + if (normalizedArticleTitle.equals(normalizedDatasetTitle)) + { + return true; + } + + // Bi-directional keyword matching: match if either direction meets the threshold. + List articleKeywords = extractTitleKeywords(normalizedArticleTitle); + List datasetKeywords = extractTitleKeywords(normalizedDatasetTitle); + + return keywordsMatch(datasetKeywords, articleKeywords) + || keywordsMatch(articleKeywords, datasetKeywords); + } + + /** + * Check if at least 60% of {@code sourceKeywords} are present in {@code targetKeywords} (with a minimum of 2). + */ + private static boolean keywordsMatch(List sourceKeywords, List targetKeywords) + { + if (sourceKeywords.size() < MIN_KEYWORDS_FOR_MATCH || targetKeywords.size() < MIN_KEYWORDS_FOR_MATCH) + { + return false; + } + + Set targetSet = new java.util.HashSet<>(targetKeywords); + + long matchCount = sourceKeywords.stream() + .filter(targetSet::contains) + .count(); + + int required = (int) Math.ceil(sourceKeywords.size() * KEYWORD_MATCH_THRESHOLD); + required = Math.max(required, Math.min(2, sourceKeywords.size())); + return matchCount >= required; + } + + /** + * Normalize title for comparison: strip HTML tags, strip diacritics, lowercase, + * replace punctuation with space (so "data-independent" becomes "data independent"), + * and normalize whitespace. + */ + static String normalizeTitle(String title) + { + if (title == null) return ""; + + return stripDiacritics(title.toLowerCase()) + .replaceAll("<[^>]+>", " ") // Strip HTML/XML tags + .replaceAll("[^\\w\\s]", " ") // Replace punctuation with space (not remove) + .replaceAll("\\s+", " ") // Normalize whitespace + .trim(); + } + + /** + * Extract meaningful keywords from title for matching. + * Returns all qualifying words (no cap) — the caller uses a percentage threshold. + */ + static List extractTitleKeywords(String title) + { + if (StringUtils.isBlank(title)) + { + return Collections.emptyList(); + } + + String[] words = title.toLowerCase().split("\\s+"); + List keywords = new ArrayList<>(); + + for (String word : words) + { + // Remove non-alphanumeric characters + word = word.replaceAll("[^a-z0-9]", ""); + + // Check length and exclude stop words + if (word.length() >= MIN_KEYWORD_LENGTH && !TITLE_STOP_WORDS.contains(word)) + { + keywords.add(word); + } + } + + return keywords; + } + + /** + * Extract PMID from PMC metadata + */ + static @Nullable String extractPubMedId(JSONObject pmcMetadata) + { + // PMC articles contain PMID in articleids array + JSONArray articleIds = pmcMetadata.optJSONArray("articleids"); + if (articleIds != null) + { + for (int i = 0; i < articleIds.length(); i++) + { + JSONObject idObj = articleIds.optJSONObject(i); + if (idObj != null && "pmid".equals(idObj.optString("idtype"))) + { + return idObj.optString("value"); + } + } + } + + return null; + } + + /** + * Parse publication date from ESummary metadata. + * Tries "sortpubdate" or "sortdate" first (format "YYYY/MM/DD HH:MM"), then falls back to + * "pubdate" and "epubdate" (formats "YYYY Mon DD", "YYYY Mon", or "YYYY"). + */ + static @Nullable Date parsePublicationDate(JSONObject metadata, Logger log) + { + // Try sort dates first — these have a consistent "YYYY/MM/DD HH:MM" format + for (String field : new String[]{"sortpubdate", "sortdate"}) + { + String dateStr = metadata.optString(field, "").trim(); + if (!StringUtils.isBlank(dateStr)) + { + try + { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm"); + sdf.setLenient(false); + return sdf.parse(dateStr); + } + catch (ParseException ignored) + { + } + } + } + + // Fall back to pubdate / epubdate + String dateStr = ""; + for (String field : new String[]{"pubdate", "epubdate"}) + { + dateStr = metadata.optString(field, "").trim(); + if (!StringUtils.isBlank(dateStr)) break; + } + if (StringUtils.isBlank(dateStr)) + { + return null; + } + + for (String format : new String[]{"yyyy MMM dd", "yyyy MMM", "yyyy"}) + { + try + { + SimpleDateFormat sdf = new SimpleDateFormat(format); + sdf.setLenient(false); + return sdf.parse(dateStr); + } + catch (ParseException ignored) + { + } + } + log.debug("Unable to parse publication date: {}", dateStr); + return null; + } + + /** + * Rate limiting: wait 400ms between API requests + */ + private static void rateLimit() + { + try + { + Thread.sleep(RATE_LIMIT_DELAY_MS); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + } + } + + /** + * Wrap string in quotes for exact match search + */ + private static String quote(String str) + { + return "\"" + str + "\""; + } + + public static class TestCase extends Assert + { + // -- parseCitation tests -- + + @Test + public void testParseCitation() + { + // Valid NLM citation response + String json = "{\"nlm\":{\"orig\":\"Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.\"}}"; + assertEquals("Abbatiello SE, Mani DR. Mol Cell Proteomics. 2013 Sep;12(9):2623-39. PMID: 23689285; PMCID: PMC3769335.", parseCitation(json, "23689285", DB.PubMed)); + + // Missing nlm key + assertEquals(null, parseCitation("{\"ama\":{\"orig\":\"something\"}}", "23689285", DB.PubMed)); + + // Malformed JSON + assertEquals(null, parseCitation("not json", "23689285", DB.PubMed)); + + // Empty nlm object (missing "orig" key) + assertEquals(null, parseCitation("{\"nlm\":{}}", "23689285", DB.PubMed)); + } + + // -- isPreprint tests -- + + @Test + public void testIsPreprint() + { + // Non-preprint + assertFalse(isPreprint(articleMetadata("J Proteome Res", "Journal of Proteome Research"))); + + // Preprint by source + assertTrue(isPreprint(articleMetadata("medRxiv", ""))); + assertTrue(isPreprint(articleMetadata("bioRxiv", "bioRxiv : the preprint server for biology"))); + + // Preprint by journal name + assertTrue(isPreprint(articleMetadata("", "Research Square"))); + assertTrue(isPreprint(articleMetadata("", "ChemRxiv preprint"))); + + // Case insensitive + assertTrue(isPreprint(articleMetadata("BIORXIV", ""))); + + // Empty fields + assertFalse(isPreprint(articleMetadata("", ""))); + } + + // -- checkAuthorMatch tests -- + + @Test + public void testCheckAuthorMatch() + { + // Standard match: "Jones A" matches firstName=Andrew, lastName=Jones + JSONObject metadata = metadataWithAuthors("Smith CD", "Jones A", "Brown EF"); + assertTrue(checkAuthorMatch(metadata, "Andrew", "Jones")); + + // Match with multiple initials: "Jones AB" matches firstName=Andrew, lastName=Jones + assertTrue(checkAuthorMatch(metadataWithAuthors("Jones AB"), "Andrew", "Jones")); + + // Match is case-insensitive + assertTrue(checkAuthorMatch(metadataWithAuthors("jones a"), "Andrew", "Jones")); + assertTrue(checkAuthorMatch(metadataWithAuthors("JONES A"), "andrew", "jones")); + + // Author with full first name: "Jones Andrew" + assertTrue(checkAuthorMatch(metadataWithAuthors("Jones Andrew"), "Andrew", "Jones")); + + // No matching author + assertFalse(checkAuthorMatch(metadataWithAuthors("Smith CD", "Brown EF"), "Andrew", "Jones")); + + // Blank first or last name + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), "", "Jones")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), "Andrew", "")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Jones A"), null, "Jones")); + + // Empty authors array + assertFalse(checkAuthorMatch(new JSONObject(), "Andrew", "Jones")); + + // Short last name prefix collisions — last name must be followed by a space + assertFalse("'Li' should not match 'Liang B'", + checkAuthorMatch(metadataWithAuthors("Liang B"), "Alice", "Li")); + assertFalse("'Li' should not match 'Liu C'", + checkAuthorMatch(metadataWithAuthors("Liu C"), "Andrew", "Li")); + assertTrue(checkAuthorMatch(metadataWithAuthors("Li A"), "Alice", "Li")); + + // Compound last names: hyphenated and apostrophe + assertTrue(checkAuthorMatch(metadataWithAuthors("Smith-Jones A"), "Andrew", "Smith-Jones")); + assertFalse(checkAuthorMatch(metadataWithAuthors("Smith-Jones A"), "Andrew", "Smith")); + assertTrue(checkAuthorMatch(metadataWithAuthors("O'Brien K"), "Kate", "O'Brien")); + + // Name particles + assertTrue(checkAuthorMatch(metadataWithAuthors("von Haller M"), "Mark", "von Haller")); + assertTrue(checkAuthorMatch(metadataWithAuthors("de Silva R"), "Raj", "de Silva")); + + // Diacritics — accented characters match unaccented and vice versa + assertTrue(checkAuthorMatch(metadataWithAuthors("Villén J"), "Judit", "Villén")); + assertTrue(checkAuthorMatch(metadataWithAuthors("Villén J"), "Judit", "Villen")); + assertTrue(checkAuthorMatch(metadataWithAuthors("Villen J"), "Judit", "Villén")); + assertTrue(checkAuthorMatch(metadataWithAuthors("Müller E"), "Eric", "Muller")); + assertTrue(checkAuthorMatch(metadataWithAuthors("Muller E"), "Eric", "Müller")); + } + + // -- checkTitleMatch tests -- + + @Test + public void testCheckTitleMatchExact() + { + // Exact match (case-insensitive, punctuation-stripped) + JSONObject metadata = metadataWithTitle("Carafe in-silico spectral library generation for DIA"); + assertTrue(checkTitleMatch(metadata, "Carafe in-silico spectral library generation for DIA")); + assertTrue(checkTitleMatch(metadata, "carafe in silico spectral library generation for dia")); + assertTrue(checkTitleMatch(metadata, "Carafe: in silico spectral library generation for DIA!")); + } + + @Test + public void testCheckTitleMatchKeywords() + { + // Article: "Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics" + // Dataset: "Carafe: a tool for in silico spectral library generation for DIA proteomics" + // Dataset keywords: "carafe", "tool", "silico", "spectral", "library", "generation", "dia", "proteomics" (8 keywords) + // Matches in article: "carafe", "silico", "spectral", "library", "generation", "proteomics" (6 of 8 = 73%) -> pass + JSONObject metadata = metadataWithTitle("Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics"); + assertTrue(checkTitleMatch(metadata, "Carafe: a tool for in silico spectral library generation for DIA proteomics")); + + // Keywords not present in article title — below 60% + assertFalse(checkTitleMatch(metadata, "Carafe for generating spectrum libraries from DIA data")); + + // Dataset title tool short + assertFalse(checkTitleMatch(metadata, "Carafe")); + } + + @Test + public void testCheckTitleMatchBidirectional() + { + // Forward direction fails: dataset is very specific, article title is short. + // Dataset keywords: "comprehensive", "phosphoproteomic", "analysis", "novel", "biomarkers", "lung", "cancer", "cell", "lines" (9) + // Only "lung", "cancer" found in article (2 of 9 = 22%) -> forward fails + // + // Reverse direction passes: article keywords: "lung", "cancer", "proteomics" (3) + // "lung" and "cancer" found in dataset (2 of 3 = 67%) -> passes 60% threshold + JSONObject metadata = metadataWithTitle("Lung cancer proteomics"); + assertTrue(checkTitleMatch(metadata, + "Comprehensive phosphoproteomic analysis reveals novel biomarkers in lung cancer cell lines")); + + // Both directions fail — completely unrelated titles + assertFalse(checkTitleMatch(metadataWithTitle("Cardiac tissue lipidomics"), + "Hepatic transcriptomics in zebrafish embryos")); + } + + @Test + public void testCheckTitleMatchEdgeCases() + { + // Blank dataset title + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), "")); + assertFalse(checkTitleMatch(metadataWithTitle("Some article"), null)); + + // Missing title in metadata + assertFalse(checkTitleMatch(new JSONObject(), "Some title")); + + // Stop words in titles should be ignored — only meaningful keywords matter. + // All words other than "Cats" and "Dogs" will be ignored + assertFalse(checkTitleMatch(metadataWithTitle("The Role of Cats: A study based on all other studies"), + "The Role of Dogs: A study based on all other studies")); + } + + @Test + public void testCheckTitleMatchHyphens() + { + // "data-independent" in one title should match "data independent" in the other + JSONObject metadata = metadataWithTitle("Data-independent acquisition proteomics workflow"); + assertTrue("Hyphenated should match unhyphenated", + checkTitleMatch(metadata, "Data independent acquisition proteomics workflow")); + } + + @Test + public void testCheckTitleMatchHtmlTags() + { + // NCBI sometimes returns titles with HTML tags like in vivo + JSONObject metadata = metadataWithTitle("In vivo analysis of protein interactions"); + assertTrue("HTML tags should be stripped for matching", + checkTitleMatch(metadata, "In vivo analysis of protein interactions")); + + // Multiple tags + metadata = metadataWithTitle("Role of E. coli chaperones in protein folding"); + assertTrue(checkTitleMatch(metadata, "Role of E. coli chaperones in protein folding")); + } + + @Test + public void testCheckTitleMatchDiacritics() + { + // Accented characters in titles + assertTrue("Accented title should match unaccented", + checkTitleMatch(metadataWithTitle("Protéomique des échantillons hépatiques"), + "Proteomique des echantillons hepatiques")); + } + + @Test + public void testCheckTitleMatchSorttitleFallback() + { + // When "title" is missing, falls back to "sorttitle" + JSONObject metadata = new JSONObject(); + metadata.put("title", ""); + metadata.put("sorttitle", "phosphoproteomics of lung cancer"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + + // When "title" is present, it is used (not "sorttitle") + metadata = new JSONObject(); + metadata.put("title", "Phosphoproteomics of Lung Cancer"); + metadata.put("sorttitle", "completely different sort title"); + assertTrue(checkTitleMatch(metadata, "Phosphoproteomics of Lung Cancer")); + } + + // -- extractTitleKeywords tests -- + + @Test + public void testExtractTitleKeywords() + { + // Normal title — all non-stop words >= 3 chars are extracted + List keywords = extractTitleKeywords("Novel phosphoproteomics workflow for cardiac tissue samples"); + assertTrue(keywords.contains("novel")); + assertTrue(keywords.contains("phosphoproteomics")); + assertTrue(keywords.contains("workflow")); + assertTrue(keywords.contains("cardiac")); + assertTrue(keywords.contains("tissue")); + assertTrue(keywords.contains("samples")); + assertFalse("'for' is too short", keywords.contains("for")); + + // Domain-specific words are NOT stop words — they should be kept + keywords = extractTitleKeywords("Quantitative proteomics characterization identification analysis"); + assertTrue("'proteomics' is domain-specific, not a stop word", keywords.contains("proteomics")); + assertTrue("'characterization' is domain-specific", keywords.contains("characterization")); + assertTrue("'identification' is domain-specific", keywords.contains("identification")); + assertTrue("'analysis' is domain-specific", keywords.contains("analysis")); + assertTrue(keywords.contains("quantitative")); + + // Function words and title fillers are excluded + keywords = extractTitleKeywords("the study using based reveals role"); + assertFalse("'the' is a function word", keywords.contains("the")); + assertFalse("'study' is a title filler", keywords.contains("study")); + assertFalse("'using' is a title filler", keywords.contains("using")); + assertFalse("'based' is a title filler", keywords.contains("based")); + assertFalse("'reveals' is a title filler", keywords.contains("reveals")); + assertFalse("'role' is a title filler", keywords.contains("role")); + + // Short meaningful words (>= 3 chars) are kept + keywords = extractTitleKeywords("DIA analysis of lung cell iron metabolism in mice"); + assertTrue("'dia' (3 chars) should be kept", keywords.contains("dia")); + assertTrue("'lung' (4 chars) should be kept", keywords.contains("lung")); + assertTrue("'cell' (4 chars) should be kept", keywords.contains("cell")); + assertTrue("'iron' (4 chars) should be kept", keywords.contains("iron")); + assertTrue("'mice' (4 chars) should be kept", keywords.contains("mice")); + assertFalse("'of' (2 chars) is too short", keywords.contains("of")); + assertFalse("'in' (2 chars) is too short", keywords.contains("in")); + + // No cap on number of keywords + keywords = extractTitleKeywords("alpha bravo charlie delta foxtrot hotel india juliet kilo lima"); + assertEquals(10, keywords.size()); + + // Blank input + assertTrue(extractTitleKeywords("").isEmpty()); + assertTrue(extractTitleKeywords(null).isEmpty()); + } + + // -- normalizeTitle tests -- + + @Test + public void testNormalizeTitle() + { + assertEquals("hello world", normalizeTitle("Hello, World!")); + assertEquals("test driven development", normalizeTitle("Test-Driven Development")); + assertEquals("multiple spaces become one", normalizeTitle(" Multiple spaces become one ")); + assertEquals("", normalizeTitle(null)); + assertEquals("", normalizeTitle("")); + + // HTML tags are stripped + assertEquals("in vivo analysis of protein", normalizeTitle("in vivo analysis of protein")); + assertEquals("e coli proteomics", normalizeTitle("E. coli proteomics")); + + // Apostrophes are replaced with space (like other punctuation) + assertEquals("garcia s analysis", normalizeTitle("García's Analysis")); + + // Hyphens become spaces: "data-independent" matches "data independent" + assertEquals("data independent acquisition", normalizeTitle("data-independent acquisition")); + assertEquals("data independent acquisition", normalizeTitle("data independent acquisition")); + } + + // -- extractPubMedId tests -- + + @Test + public void testExtractPubMedId() + { + // PMC metadata with PMID + JSONObject metadata = new JSONObject(); + JSONArray articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + articleIds.put(new JSONObject().put("idtype", "pmid").put("value", "28691345")); + metadata.put("articleids", articleIds); + assertEquals("28691345", extractPubMedId(metadata)); + + // No PMID in articleids + metadata = new JSONObject(); + articleIds = new JSONArray(); + articleIds.put(new JSONObject().put("idtype", "pmcid").put("value", "PMC1234567")); + metadata.put("articleids", articleIds); + assertNull(extractPubMedId(metadata)); + + // No articleids key + assertNull(extractPubMedId(new JSONObject())); + } + + // -- parsePublicationDate tests -- + + @Test + public void testParsePublicationDate() + { + // sortpubdate is preferred (format "YYYY/MM/DD HH:MM") + assertNotNull(parsePublicationDate(metadataWithDate("sortpubdate", "2025/11/06 00:00"), LOG)); + + // sortdate is next + assertNotNull(parsePublicationDate(metadataWithDate("sortdate", "2025/11/06 00:00"), LOG)); + + // sortpubdate takes priority over pubdate + JSONObject metadata = new JSONObject(); + metadata.put("sortpubdate", "2025/11/06 00:00"); + metadata.put("pubdate", "2024 Jan"); + Date parsed = parsePublicationDate(metadata, LOG); + assertNotNull(parsed); + assertTrue("sortpubdate should be used over pubdate", parsed.toString().contains("2025")); + + // Falls back to "pubdate" when sort dates are absent + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan 15"), LOG)); + + // "YYYY Mon" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024 Jan"), LOG)); + + // "YYYY" format + assertNotNull(parsePublicationDate(metadataWithDate("pubdate", "2024"), LOG)); + + // Falls back to epubdate when pubdate is blank + assertNotNull(parsePublicationDate(metadataWithDate("epubdate", "2024 Mar 01"), LOG)); + + // Unparseable date + assertNull(parsePublicationDate(metadataWithDate("pubdate", "not-a-date"), LOG)); + + // Empty date fields + assertNull(parsePublicationDate(new JSONObject(), LOG)); + } + + // -- applyPriorityFiltering tests -- + + @Test + public void testApplyPriorityFiltering() + { + Date articleDate = new Date(); + + // Single article returned as-is + PublicationMatch single = createMatch("111", true, false, false, false, false, articleDate); + List result = applyPriorityFiltering(List.of(single), articleDate, LOG); + assertEquals(1, result.size()); + + // Articles found by multiple data IDs preferred over single-ID matches + PublicationMatch multiFieldMatch = createMatch("222", true, true, false, true, true, articleDate); + PublicationMatch singleFieldMatch = createMatch("333", true, false, false, true, true, articleDate); + result = applyPriorityFiltering(List.of(singleFieldMatch, multiFieldMatch), articleDate, LOG); + assertEquals(1, result.size()); + assertEquals("222", result.get(0).getPublicationId()); + + // Among single-ID matches, author+title both matching preferred + PublicationMatch bothMatch = createMatch("444", true, false, false, true, true, articleDate); + PublicationMatch authorOnly = createMatch("555", true, false, false, true, false, articleDate); + result = applyPriorityFiltering(List.of(authorOnly, bothMatch), articleDate, LOG); + assertEquals(1, result.size()); + assertEquals("444", result.get(0).getPublicationId()); + } + + @Test + public void testSortByDateProximity() + { + Date exptSubmitDate = new Date(); + Date closer = new Date(exptSubmitDate.getTime() - 86400000L); // 1 day before + Date farther = new Date(exptSubmitDate.getTime() - 86400000L * 365); // 1 year before + + PublicationMatch farMatch = createMatch("111", true, false, false, false, false, farther); + PublicationMatch closeMatch = createMatch("222", true, false, false, false, false, closer); + PublicationMatch noDate = createMatch("333", true, false, false, false, false, null); + + List result = sortByDateProximity(List.of(farMatch, noDate, closeMatch), exptSubmitDate); + assertEquals("222", result.get(0).getPublicationId()); // closest + assertEquals("111", result.get(1).getPublicationId()); // farther + assertEquals("333", result.get(2).getPublicationId()); // no date last + } + + // -- PublicationMatch round-trip tests -- + + @Test + public void testPublicationMatchRoundTrip() + { + PublicationMatch original = new PublicationMatch("12345", DB.PubMed, true, true, false, true, false, null); + assertEquals("ProteomeXchange ID, Panorama URL, Author", original.getMatchInfo()); + + PublicationMatch restored = PublicationMatch.fromMatchInfo("12345", DB.PubMed, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertFalse(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertFalse(restored.matchesTitle()); + + // All flags + original = new PublicationMatch("67890", DB.PMC, true, true, true, true, true, null); + assertEquals("ProteomeXchange ID, Panorama URL, DOI, Author, Title", original.getMatchInfo()); + restored = PublicationMatch.fromMatchInfo("67890", DB.PMC, original.getMatchInfo()); + assertTrue(restored.matchesProteomeXchangeId()); + assertTrue(restored.matchesPanoramaUrl()); + assertTrue(restored.matchesDoi()); + assertTrue(restored.matchesAuthor()); + assertTrue(restored.matchesTitle()); + + // Empty match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, ""); + assertFalse(restored.matchesProteomeXchangeId()); + assertFalse(restored.matchesAuthor()); + + // Null match info + restored = PublicationMatch.fromMatchInfo("11111", DB.PubMed, null); + assertFalse(restored.matchesProteomeXchangeId()); + } + + // -- Helper methods for building test JSON -- + + private static JSONObject articleMetadata(String source, String fullJournalName) + { + JSONObject metadata = new JSONObject(); + metadata.put("source", source); + metadata.put("fulljournalname", fullJournalName); + return metadata; + } + + private static JSONObject metadataWithAuthors(String... authorNames) + { + JSONObject metadata = new JSONObject(); + JSONArray authors = new JSONArray(); + for (String name : authorNames) + { + authors.put(new JSONObject().put("name", name)); + } + metadata.put("authors", authors); + return metadata; + } + + private static JSONObject metadataWithTitle(String title) + { + JSONObject metadata = new JSONObject(); + metadata.put("title", title); + return metadata; + } + + private static JSONObject metadataWithDate(String field, String dateStr) + { + JSONObject metadata = new JSONObject(); + metadata.put(field, dateStr); + return metadata; + } + + private static PublicationMatch createMatch(String id, boolean pxId, boolean url, boolean doi, + boolean author, boolean title, @Nullable Date pubDate) + { + return new PublicationMatch(id, DB.PubMed, pxId, url, doi, author, title, pubDate); + } + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java new file mode 100644 index 00000000..18932859 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/ncbi/PublicationMatch.java @@ -0,0 +1,206 @@ +package org.labkey.panoramapublic.ncbi; + +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.panoramapublic.model.DatasetStatus; +import org.labkey.panoramapublic.ncbi.NcbiConstants.DB; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +public class PublicationMatch +{ + public static final String MATCH_PX_ID = "ProteomeXchange ID"; + public static final String MATCH_PANORAMA_URL = "Panorama URL"; + public static final String MATCH_DOI = "DOI"; + public static final String MATCH_AUTHOR = "Author"; + public static final String MATCH_TITLE = "Title"; + + private final String _publicationId; + private final DB _publicationType; + private final boolean _matchesProteomeXchangeId; + private final boolean _matchesPanoramaUrl; + private final boolean _matchesDoi; + private final boolean _matchesAuthor; + private final boolean _matchesTitle; + private final @Nullable Date _publicationDate; + private @Nullable String _citation; + + + public PublicationMatch(String publicationId, DB publicationType, + boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, + boolean matchesAuthor, boolean matchesTitle, + @Nullable Date publicationDate, @Nullable String citation) + { + _publicationId = publicationId; + _publicationType = publicationType; + _matchesProteomeXchangeId = matchesProteomeXchangeId; + _matchesPanoramaUrl = matchesPanoramaUrl; + _matchesDoi = matchesDoi; + _matchesAuthor = matchesAuthor; + _matchesTitle = matchesTitle; + _publicationDate = publicationDate; + _citation = citation; + } + + public PublicationMatch(String publicationId, DB publicationType, + boolean matchesProteomeXchangeId, boolean matchesPanoramaUrl, boolean matchesDoi, + boolean matchesAuthor, boolean matchesTitle, + @Nullable Date publicationDate) + { + this(publicationId, publicationType, matchesProteomeXchangeId, matchesPanoramaUrl, matchesDoi, matchesAuthor, matchesTitle, publicationDate, null); + } + + public String getPublicationId() + { + return _publicationId; + } + + public DB getPublicationType() + { + return _publicationType; + } + + public boolean matchesProteomeXchangeId() + { + return _matchesProteomeXchangeId; + } + + public boolean matchesPanoramaUrl() + { + return _matchesPanoramaUrl; + } + + public boolean matchesDoi() + { + return _matchesDoi; + } + + public boolean matchesAuthor() + { + return _matchesAuthor; + } + + public boolean matchesTitle() + { + return _matchesTitle; + } + + public @Nullable Date getPublicationDate() + { + return _publicationDate; + } + + public @Nullable String getCitation() + { + return _citation; + } + + public void setCitation(@Nullable String citation) + { + _citation = citation; + } + + /** + * String representation of what matched for this article. + * This is what gets stored in the DatasetStatus table's PublicationMatchInfo column. + * Example: "ProteomeXchange ID, Panorama URL, Author, Title" + */ + public String getMatchInfo() + { + List parts = new ArrayList<>(); + if (_matchesProteomeXchangeId) parts.add(MATCH_PX_ID); + if (_matchesPanoramaUrl) parts.add(MATCH_PANORAMA_URL); + if (_matchesDoi) parts.add(MATCH_DOI); + if (_matchesAuthor) parts.add(MATCH_AUTHOR); + if (_matchesTitle) parts.add(MATCH_TITLE); + return String.join(", ", parts); + } + + public String getPublicationUrl() + { + if (_publicationType == DB.PMC) + { + return NcbiConstants.getPmcLink(_publicationId); + } + return NcbiConstants.getPubmedLink(_publicationId); + } + + public String getPublicationIdLabel() + { + return _publicationType.getLabel() + " ID " + _publicationId; + } + + public boolean isPubMed() + { + return _publicationType != null && _publicationType == DB.PubMed; + } + + public JSONObject toJson() + { + JSONObject json = new JSONObject(); + json.put("publicationId", _publicationId); + json.put("publicationType", _publicationType.name()); + json.put("publicationIdLabel", getPublicationIdLabel()); + json.put("publicationUrl", getPublicationUrl()); + json.put("matchInfo", getMatchInfo()); + if (_citation != null) + { + json.put("citation", _citation); + } + return json; + } + + /** + * Build a PublicationMatch from a publication ID, type, citation, and match info string. + * @see #getMatchInfo() + */ + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, + @Nullable String matchInfo, @Nullable String citation) + { + boolean pxId = false, url = false, doi = false, author = false, title = false; + if (!StringUtils.isBlank(matchInfo)) + { + for (String segment : matchInfo.split(",")) + { + switch (segment.trim()) + { + case MATCH_PX_ID -> pxId = true; + case MATCH_PANORAMA_URL -> url = true; + case MATCH_DOI -> doi = true; + case MATCH_AUTHOR -> author = true; + case MATCH_TITLE -> title = true; + default -> { /* unknown token – ignore */ } + } + } + } + return new PublicationMatch(publicationId, publicationType, pxId, url, doi, author, title, null, citation); + } + + public static PublicationMatch fromMatchInfo(@NotNull String publicationId, @NotNull DB publicationType, + @Nullable String matchInfo) + { + return fromMatchInfo(publicationId, publicationType, matchInfo, null); + } + + /** + * Build a PublicationMatch from a persisted DatasetStatus. + */ + public static @Nullable PublicationMatch fromDatasetStatus(@NotNull DatasetStatus datasetStatus) + { + if (StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + { + return null; + } + DB type = DB.fromString(datasetStatus.getPublicationType()); + if (type == null) + { + return null; + } + return fromMatchInfo(datasetStatus.getPotentialPublicationId(), type, + datasetStatus.getPublicationMatchInfo(), datasetStatus.getCitation()); + } +} diff --git a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java index 47d9d962..6b64d9e1 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java +++ b/panoramapublic/src/org/labkey/panoramapublic/pipeline/PrivateDataReminderJob.java @@ -25,6 +25,8 @@ import org.labkey.panoramapublic.model.ExperimentAnnotations; import org.labkey.panoramapublic.model.Journal; import org.labkey.panoramapublic.model.JournalSubmission; +import org.labkey.panoramapublic.ncbi.NcbiPublicationSearchService; +import org.labkey.panoramapublic.ncbi.PublicationMatch; import org.labkey.panoramapublic.query.DatasetStatusManager; import org.labkey.panoramapublic.query.ExperimentAnnotationsManager; import org.labkey.panoramapublic.query.JournalManager; @@ -43,6 +45,7 @@ public class PrivateDataReminderJob extends PipelineJob { private boolean _test; + private boolean _forcePublicationCheck; private List _experimentAnnotationsIds; private Journal _panoramaPublic; @@ -56,6 +59,11 @@ public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, b } public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, Journal panoramaPublic, List experimentAnnotationsIds, boolean test) + { + this(info, root, panoramaPublic, experimentAnnotationsIds, test, false); + } + + public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, Journal panoramaPublic, List experimentAnnotationsIds, boolean test, boolean forcePublicationCheck) { super("Panorama Public", info, root); setLogFile(root.getRootFileLike().toNioPathForWrite().resolve(FileUtil.makeFileNameWithTimestamp("PanoramaPublic-private-data-reminder", "log"))); @@ -63,6 +71,7 @@ public PrivateDataReminderJob(ViewBackgroundInfo info, @NotNull PipeRoot root, J _experimentAnnotationsIds = experimentAnnotationsIds; _test = test; + _forcePublicationCheck = forcePublicationCheck; } private static Journal getPanoramaPublic() @@ -163,6 +172,104 @@ public static ReminderDecision skip(String reason) { public boolean shouldPost() { return shouldPost; } public String getReason() { return reason; } } + + /** + * Checks for publications associated with the experiment if enabled in settings + * @param expAnnotations The experiment to check + * @param settings The reminder settings + * @param forceCheck Force publication check regardless of global setting + * @param log Logger for diagnostic messages + * @return NcbiArticleMatch with search result + */ + private PublicationMatch searchForPublication(@NotNull ExperimentAnnotations expAnnotations, + @NotNull PrivateDataReminderSettings settings, + boolean forceCheck, + @NotNull User user, + boolean testMode, + @NotNull Logger log) + { + // Check if publication checking is enabled (either globally or forced for this run) + if (!forceCheck && !settings.isEnablePublicationSearch()) + { + log.debug("Publication checking is disabled in settings"); + return null; + } + + // Get existing DatasetStatus to check cached results and user dismissals + DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); + if (datasetStatus != null) + { + // If user has dismissed the publication suggestion, check search delay + Date dismissedDate = datasetStatus.getUserDismissedPublication(); + if (dismissedDate != null) + { + if (settings.isPublicationDismissalRecent(datasetStatus)) + { + log.info(String.format("User dismissed publication for experiment %d on %s; Publication search deferred (%d months)", + expAnnotations.getId(), dismissedDate, settings.getPublicationSearchFrequency())); + return null; + } + + // Search deferral expired — re-search NCBI + log.info(String.format("Search deferral expired for experiment %d (dismissed %s); re-searching NCBI", + expAnnotations.getId(), dismissedDate)); + try + { + PublicationMatch newMatch = NcbiPublicationSearchService.get().searchForPublication(expAnnotations, log); + if (newMatch != null && !newMatch.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + { + // Different publication found — return it (caller will save and notify) + log.info(String.format("New publication %s found for experiment %d (previously dismissed %s)", + newMatch.getPublicationId(), expAnnotations.getId(), datasetStatus.getPotentialPublicationId())); + return newMatch; + } + else + { + // Same publication or nothing found — update dismissal date to restart search deferral + if (testMode) + { + log.info(String.format("TEST MODE: No new publication for experiment %d; Would reset search deferral", expAnnotations.getId())); + } + else + { + log.info(String.format("No new publication for experiment %d; resetting search deferral", + expAnnotations.getId())); + datasetStatus.setUserDismissedPublication(new Date()); + DatasetStatusManager.update(datasetStatus, user); + } + return null; + } + } + catch (Exception e) + { + log.error(String.format("Error re-searching publication for experiment %d: %s", + expAnnotations.getId(), e.getMessage()), e); + return null; + } + } + + // If we already have a cached publication ID, use it + if (!StringUtils.isBlank(datasetStatus.getPotentialPublicationId())) + { + log.info(String.format("Using cached publication %s %s for experiment %d", + datasetStatus.getPublicationType(), datasetStatus.getPotentialPublicationId(), expAnnotations.getId())); + return PublicationMatch.fromDatasetStatus(datasetStatus); + } + } + + // Perform the publication search + log.info(String.format("Searching for publications for experiment %d", expAnnotations.getId())); + try + { + return NcbiPublicationSearchService.get().searchForPublication(expAnnotations, log); + } + catch (Exception e) + { + log.error(String.format("Error searching for publication for experiment %d: %s", + expAnnotations.getId(), e.getMessage()), e); + return null; + } + } @Override public void run() @@ -207,17 +314,21 @@ private void processExperiments(List expAnnotationIds, ProcessingContex log.info(String.format("Posting reminder message to: %d message threads.", expAnnotationIds.size())); Set exptIds = new HashSet<>(expAnnotationIds); - try (DbScope.Transaction transaction = PanoramaPublicManager.getSchema().getScope().ensureTransaction()) + if (_test) + { + log.info("RUNNING IN TEST MODE - MESSAGES WILL NOT BE POSTED."); + } + for (Integer experimentAnnotationsId : exptIds) { - if (_test) + try (DbScope.Transaction transaction = PanoramaPublicManager.getSchema().getScope().ensureTransaction()) { - log.info("RUNNING IN TEST MODE - MESSAGES WILL NOT BE POSTED."); + processExperiment(experimentAnnotationsId, context, processingResults); + transaction.commit(); } - for (Integer experimentAnnotationsId : exptIds) + catch (Exception e) { - processExperiment(experimentAnnotationsId, context, processingResults); + log.error(String.format("Error processing experiment %d: %s", experimentAnnotationsId, e.getMessage()), e); } - transaction.commit(); } processingResults.logResults(log); @@ -265,18 +376,22 @@ private void processExperiment(Integer experimentAnnotationsId, ProcessingContex return; } + // Check for publications if enabled + PublicationMatch publicationResult = searchForPublication(expAnnotations, context.getSettings(), _forcePublicationCheck, getUser(), context.isTestMode(), processingResults._log); + if (!context.isTestMode()) { - postReminderMessage(expAnnotations, submission, announcement, submitter, context); + postReminderMessage(expAnnotations, submission, announcement, submitter, publicationResult, context); - updateDatasetStatus(expAnnotations); + updateDatasetStatus(expAnnotations, publicationResult); } processingResults.addProcessed(expAnnotations, announcement); } private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSubmission submission, - Announcement announcement, User submitter, ProcessingContext context) + Announcement announcement, User submitter, @Nullable PublicationMatch publicationResult, + ProcessingContext context) { // Older message threads, pre March 2023, will not have the submitter or lab head on the notify list. Add them. List notifyList = new ArrayList<>(); @@ -294,11 +409,12 @@ private void postReminderMessage(ExperimentAnnotations expAnnotations, JournalSu notifyList, announcement, context.getAnnouncementsFolder(), - context.getJournalAdmin() + context.getJournalAdmin(), + publicationResult ); } - private void updateDatasetStatus(ExperimentAnnotations expAnnotations) + private void updateDatasetStatus(ExperimentAnnotations expAnnotations, @Nullable PublicationMatch publicationResult) { DatasetStatus datasetStatus = DatasetStatusManager.getForExperiment(expAnnotations); if (datasetStatus == null) @@ -306,11 +422,36 @@ private void updateDatasetStatus(ExperimentAnnotations expAnnotations) datasetStatus = new DatasetStatus(); datasetStatus.setExperimentAnnotationsId(expAnnotations.getId()); datasetStatus.setLastReminderDate(new Date()); + + // Save publication search results if found + if (publicationResult != null) + { + datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); + datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); + datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + datasetStatus.setCitation(publicationResult.getCitation()); + } + DatasetStatusManager.save(datasetStatus, getUser()); } else { datasetStatus.setLastReminderDate(new Date()); + + // Update publication search results + if (publicationResult != null) + { + // If this is a new/different publication, update it and clear dismissal + if (!publicationResult.getPublicationId().equals(datasetStatus.getPotentialPublicationId())) + { + datasetStatus.setPotentialPublicationId(publicationResult.getPublicationId()); + datasetStatus.setPublicationType(publicationResult.getPublicationType().name()); + datasetStatus.setPublicationMatchInfo(publicationResult.getMatchInfo()); + datasetStatus.setCitation(publicationResult.getCitation()); + datasetStatus.setUserDismissedPublication(null); // Clear dismissal for new publication + } + } + DatasetStatusManager.update(datasetStatus, getUser()); } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java index c599dd9f..bb8355e8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java +++ b/panoramapublic/src/org/labkey/panoramapublic/proteomexchange/NcbiUtils.java @@ -15,14 +15,10 @@ */ package org.labkey.panoramapublic.proteomexchange; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.Logger; -import org.json.JSONException; import org.json.JSONObject; import org.labkey.api.collections.IntHashMap; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.util.Pair; import org.labkey.api.util.logging.LogHelper; import org.labkey.api.view.AjaxCompletion; import org.w3c.dom.CharacterData; @@ -37,27 +33,27 @@ import javax.xml.parsers.ParserConfigurationException; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.logging.log4j.Logger; + +/** + * Utilities for NCBI taxonomy lookups (autocomplete and scientific name resolution). + */ public class NcbiUtils { private static final Pattern pattern = Pattern.compile("new Array\\(\"(.*)\"\\),"); private static final String eutilsUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy"; private static final String autoCompUrl = "https://blast.ncbi.nlm.nih.gov/portal/utils/autocomp.fcgi?dict=taxids_sg&q="; - private static final String citationEporterUrl = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id="; - public static final String PUBMED_ID = "^[0-9]{1,8}$"; // https://libguides.library.arizona.edu/c.php?g=406096&p=2779570 - private static final Logger LOG = LogHelper.getLogger(NcbiUtils.class, "Messages about using the NCBI utilities"); public static List getCompletions(String token) throws PxException @@ -181,78 +177,4 @@ public static Map getScientificNames(List taxIds) thro } return sciNameMap; } - - /** - * Does a citation lookup for the given PubMedId using the NCBI's Literature Citation Exporter (https://api.ncbi.nlm.nih.gov/lit/ctxp). - * Example URL: https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id=28691345 - * @param pubmedId PubMed Id - * @return the PubMed link and the NLM-style citation if the lookup is successful - */ - public static Pair getLinkAndCitation (String pubmedId) - { - if (pubmedId == null || !pubmedId.matches(PUBMED_ID)) - { - return null; - } - - String queryUrl = citationEporterUrl + pubmedId; - - HttpURLConnection conn = null; - try - { - URL url = new URL(queryUrl); - conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - - int status = conn.getResponseCode(); - - if (status == HttpURLConnection.HTTP_OK) - { - String response; - try (InputStream in = conn.getInputStream()) - { - response = IOUtils.toString(in, StandardCharsets.UTF_8); - } - String citation = getCitation(response, pubmedId); - - return citation != null ? new Pair<>(getPubmedLink(pubmedId), citation) : null; - } - } - catch (IOException e) - { - LOG.error("Error submitting a request to NCBI Literature Citation Exporter. URL: " + queryUrl, e); - } - finally - { - if(conn != null) conn.disconnect(); - } - return null; - } - - private static String getPubmedLink(String pubmedId) - { - // Example: https://pubmed.ncbi.nlm.nih.gov/29331002 - return "https://pubmed.ncbi.nlm.nih.gov/" + pubmedId; - } - - private static String getCitation(String response, String pubmedId) - { - try - { - var jsonObject = new JSONObject(response); - /* We are interested in the NLM style citation. - nlm: { - orig: "ENCODE Project Consortium. An integrated encyclopedia of DNA elements in the human genome. Nature. 2012 Sep 6;489(7414):57-74. doi: 10.1038/nature11247. PMID: 22955616; PMCID: PMC3439153.", - format: "ENCODE Project Consortium. An integrated encyclopedia of DNA elements in the human genome. Nature. 2012 Sep 6;489(7414):57-74. doi: 10.1038/nature11247. PMID: 22955616; PMCID: PMC3439153." - } - */ - var nlmInfo = jsonObject.optJSONObject("nlm"); - return null != nlmInfo ? nlmInfo.getString("orig") : null; - } - catch (JSONException e) - { - LOG.error("Error parsing response from NCBI Literature Citation Exporter for pubmedID " + pubmedId, e); - } - return null; - } } diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java index b0cb1624..d0dc81e8 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java +++ b/panoramapublic/src/org/labkey/panoramapublic/view/expannotations/TargetedMSExperimentWebPart.java @@ -86,6 +86,7 @@ else if(expAnnotations.getContainer().equals(container)) navTree.addChild("DOI", new ActionURL(PanoramaPublicController.DoiOptionsAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Make Data Public", new ActionURL(PanoramaPublicController.MakePublicAction.class, container).addParameter("id", expAnnotations.getId())); navTree.addChild("Post to Bluesky", new ActionURL(PanoramaPublicController.PostToBlueskyOptionsAction.class, container).addParameter("id", expAnnotations.getId())); + navTree.addChild("Search Publications", new ActionURL(PanoramaPublicController.SearchPublicationsForDatasetAction.class, container).addParameter("id", expAnnotations.getId())); JournalSubmission submission = SubmissionManager.getSubmissionForExperiment(expAnnotations); if (submission != null) diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp index af474fb9..bac429a7 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/privateDataRemindersSettingsForm.jsp @@ -64,6 +64,23 @@ } window.location = LABKEY.ActionURL.buildURL("panoramapublic", "sendPrivateDataReminders.view", folderPath); } + + function clickSearchPublicationsLink() + { + const journal = document.getElementById("journal"); + if (!journal) + { + alert("Cannot get journal selector element."); + return; + } + const folderPath = journal.value; + if (!folderPath) + { + alert("Please select a Panorama Public folder first."); + return; + } + window.location = LABKEY.ActionURL.buildURL("panoramapublic", "searchPublications.view", folderPath); + } @@ -127,7 +144,33 @@ - + + + <%=h(PrivateDataReminderSettings.PROP_ENABLE_PUBLICATION_SEARCH)%> + + + /> +
+ When enabled, the system will search PubMed Central and PubMed for publications associated with private datasets. +
+ If a publication is found, submitters will be notified. +
+ + + + + <%=h(PrivateDataReminderSettings.PROP_PUBLICATION_SEARCH_FREQUENCY)%> + + + +
+ Number of months to wait after a user dismisses a publication suggestion before re-searching. +
+ If a different publication is found after this delay, the submitter will be notified again. +
+ + + <%=button("Save").submit(true)%> <%=button("Cancel").href(panoramaPublicAdminUrl)%> @@ -148,6 +191,7 @@ %> <%=link("Send Reminders Now").onClick("clickSendRemindersLink();").build()%> + <%=link("Search Publications").onClick("clickSearchPublicationsLink();").build()%>
diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp new file mode 100644 index 00000000..6f01ceae --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForDataset.jsp @@ -0,0 +1,120 @@ +<%@ taglib prefix="labkey" uri="http://www.labkey.org/taglib" %> +<%@ page import="org.labkey.api.view.HttpView" %> +<%@ page import="org.labkey.api.view.JspView" %> +<%@ page import="org.labkey.api.view.ActionURL" %> +<%@ page import="org.labkey.api.util.DateUtil" %> +<%@ page import="org.labkey.panoramapublic.PanoramaPublicController" %> +<%@ page import="org.labkey.panoramapublic.PanoramaPublicController.SearchPublicationsForDatasetBean" %> +<%@ page import="org.labkey.panoramapublic.ncbi.PublicationMatch" %> +<%@ page import="org.labkey.panoramapublic.model.ExperimentAnnotations" %> +<%@ page import="org.labkey.api.view.ShortURLRecord" %> +<%@ page import="org.labkey.panoramapublic.proteomexchange.ProteomeXchangeService" %> +<%@ page extends="org.labkey.api.jsp.JspBase" %> + +<% + JspView view = HttpView.currentView(); + SearchPublicationsForDatasetBean bean = view.getModelBean(); + ExperimentAnnotations experiment = bean.getExperimentAnnotations(); + ShortURLRecord shortUrl = experiment.getShortUrl(); + String submitterName = experiment.getSubmitterName(); + ActionURL postUrl = new ActionURL(PanoramaPublicController.NotifySubmitterOfPublicationAction.class, getContainer()); +%> + + + + + + + + + + + + + + + + + + <% if (experiment.hasPxid()) { %> + + + + + <% } %> + <% if (experiment.hasDoi()) { %> + + + + + <% } %> +
Title: + <% if (shortUrl != null) { %> + <%=h(experiment.getTitle())%> + <% } else { %> + <%=h(experiment.getTitle())%> + <% } %> +
Created:<%=h(DateUtil.formatDateTime(experiment.getCreated(), "yyyy-MM-dd"))%>
Submitter:<%=h(submitterName != null ? submitterName : "Unknown")%>
PX ID:<%=simpleLink(experiment.getPxid(), ProteomeXchangeService.toUrl(experiment.getPxid()))%>
DOI:<%=h(experiment.getDoi())%>
+ +<% if (!bean.getMatches().isEmpty()) { %> + + + + + + + + + + + + <% if (bean.isShowDismissedColumn()) { %> + + <% } %> + + <% + int rowIdx = 0; + for (PublicationMatch match : bean.getMatches()) + { + String rowCls = (rowIdx++) % 2 == 0 ? "labkey-alternate-row" : "labkey-row"; + String pubId = match.getPublicationId(); + %> + + + + + <% if (bean.isShowDismissedColumn()) { %> + + <% } %> + + <% } %> +
SelectPublicationMatchesUser Dismissed
+ + + <%=simpleLink(match.getCitation() != null ? match.getCitation() : match.getPublicationIdLabel(), match.getPublicationUrl()).target("_blank")%> + <%=h(match.getMatchInfo())%><%=h(pubId.equals(bean.getDismissedPubId()) ? "Yes" : "")%>
+
+ <%=button("Notify Submitter").submit(true)%> +
+ +<% } else { %> + +
No publications found for this dataset.
+ +<% } %> diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp new file mode 100644 index 00000000..81721758 --- /dev/null +++ b/panoramapublic/src/org/labkey/panoramapublic/view/searchPublicationsForm.jsp @@ -0,0 +1,253 @@ +<%@ taglib prefix="labkey" uri="http://www.labkey.org/taglib" %> +<%@ page import="org.labkey.api.view.HttpView" %> +<%@ page import="org.labkey.api.view.JspView" %> +<%@ page import="org.labkey.panoramapublic.PanoramaPublicController" %> +<%@ page import="org.labkey.api.view.ActionURL" %> +<%@ page extends="org.labkey.api.jsp.JspBase" %> + +<% + JspView view = HttpView.currentView(); + var bean = view.getModelBean(); +%> + + + + + + + +
+
+ Select datasets below, then click "Search Publications" to search PubMed and PMC for associated publications. +
+
+ <%=button("Search Publications").id("search-pub-btn").onClick("searchPublications();")%> +
+
+ +
diff --git a/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp b/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp index 5708b3c5..01f45e18 100644 --- a/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp +++ b/panoramapublic/src/org/labkey/panoramapublic/view/sendPrivateDataRemindersForm.jsp @@ -71,7 +71,16 @@ /> - <%=button("Post Reminders").onClick("submitForm();")%> + + Search for Publications: + + /> +
+ When checked, the system will search for publications before sending reminders. +
+ + + <%=button("Post Reminders").onClick("submitForm();")%>
diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java index 9dc6eea8..1af9f4c4 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PanoramaPublicBaseTest.java @@ -37,7 +37,9 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -223,10 +225,15 @@ void setupSubfolder(String projectName, String parentFolderName, String folderNa } void updateSubmitterAccountInfo(String lastName) + { + updateSubmitterAccountInfo(lastName, null); + } + + void updateSubmitterAccountInfo(String lastName, String firstName) { goToMyAccount(); clickButton("Edit"); - setFormElement(Locator.name("quf_FirstName"), "Submitter"); + setFormElement(Locator.name("quf_FirstName"), firstName != null ? firstName : "Submitter"); setFormElement(Locator.name("quf_LastName"), lastName); clickButton("Submit"); } @@ -490,20 +497,27 @@ public File getSampleDataPath(String file) return TestFileUtils.getSampleData("TargetedMS/" + getSampleDataFolder() + file); } - protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitter, @Nullable String submitterLastName, + protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitterEmail, @Nullable String submitterLastName, @Nullable String admin, String skylineDocName) { - setupSourceFolder(projectName, folderName, submitter); + return setupFolderSubmitAndCopy(projectName, folderName, targetFolder, experimentTitle, submitterEmail, submitterLastName, null, admin, skylineDocName); + } + + protected String setupFolderSubmitAndCopy(String projectName, String folderName, String targetFolder, String experimentTitle, String submitterEmail, + @Nullable String submitterLastName, @Nullable String submitterFirstName, + @Nullable String admin, String skylineDocName) + { + setupSourceFolder(projectName, folderName, submitterEmail); if (admin != null) { createFolderAdmin(projectName, folderName, admin); } - impersonate(submitter); + impersonate(submitterEmail); if (submitterLastName != null) { - updateSubmitterAccountInfo(submitterLastName); + updateSubmitterAccountInfo(submitterLastName, submitterFirstName); } // Import a Skyline document to the folder @@ -603,6 +617,66 @@ protected void verifyIsPublicColumn(String panoramaPublicProject, String experim assertEquals(isPublic ? "Yes" : "No", expListTable.getDataAsText(0, "Public")); } + /** + * Navigate to the Private Data Reminder Settings page and read the current form values. + * Returns a map with keys: extensionLength, delayUntilFirstReminder, reminderFrequency, enablePublicationSearch, publicationSearchFrequency. + */ + protected Map getPrivateDataReminderSettings() + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + + Map settings = new HashMap<>(); + settings.put("extensionLength", getFormElement(Locator.input("extensionLength"))); + settings.put("delayUntilFirstReminder", getFormElement(Locator.input("delayUntilFirstReminder"))); + settings.put("reminderFrequency", getFormElement(Locator.input("reminderFrequency"))); + settings.put("enablePublicationSearch", String.valueOf(Locator.checkboxByName("enablePublicationSearch").findElement(getDriver()).isSelected())); + settings.put("publicationSearchFrequency", getFormElement(Locator.input("publicationSearchFrequency"))); + return settings; + } + + protected void savePrivateDataReminderSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency) + { + savePrivateDataReminderSettings(extensionLength, delayUntilFirstReminder, reminderFrequency, false); + } + + protected void savePrivateDataReminderSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency, boolean enablePublicationSearch) + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + + setFormElement(Locator.input("delayUntilFirstReminder"), delayUntilFirstReminder); + setFormElement(Locator.input("reminderFrequency"), reminderFrequency); + setFormElement(Locator.input("extensionLength"), extensionLength); + if (enablePublicationSearch) + { + checkCheckbox(Locator.checkboxByName("enablePublicationSearch")); + } + else + { + uncheckCheckbox(Locator.checkboxByName("enablePublicationSearch")); + } + clickButton("Save", 0); + waitForText("Private data reminder settings saved"); + clickAndWait(Locator.linkWithText("Back to Private Data Reminder Settings")); + + assertEquals(String.valueOf(delayUntilFirstReminder), getFormElement(Locator.input("delayUntilFirstReminder"))); + assertEquals(String.valueOf(reminderFrequency), getFormElement(Locator.input("reminderFrequency"))); + assertEquals(String.valueOf(extensionLength), getFormElement(Locator.input("extensionLength"))); + } + + protected void goToSendRemindersPage(String projectName) + { + goToAdminConsole().goToSettingsSection(); + clickAndWait(Locator.linkWithText("Panorama Public")); + clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); + selectOptionByText(Locator.name("journal"), projectName); + clickAndWait(Locator.linkWithText("Send Reminders Now")); + waitForText(projectName, "A reminder message will be sent to the submitters of the selected experiments"); + } + @Override protected void doCleanup(boolean afterTest) throws TestTimeoutException { diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java index 0ead1b8a..50e34e25 100644 --- a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PrivateDataReminderTest.java @@ -129,7 +129,7 @@ private void testSendingReminders(String projectName, List dataF assertEquals(2, privateDataCount); log("Changing reminder settings. Setting reminder frequency to 0."); - saveSettings("2", "12", "0"); + savePrivateDataReminderSettings("2", "12", "0"); // Do not select any experiments. Job will not run. log("Attempt to send reminders without selecting any experiments. Job should not run."); @@ -145,7 +145,7 @@ private void testSendingReminders(String projectName, List dataF verifyPipelineJobLogMessage(projectName, message, message2, "Skipped posting reminders for 2 experiments"); log("Changing reminder settings. Setting delay until first reminder to 0."); - saveSettings("2", "0", "0"); + savePrivateDataReminderSettings("2", "0", "0"); // Post reminders in test mode. log("Posting reminders in test mode. Select all experiment rows."); @@ -161,7 +161,7 @@ private void testSendingReminders(String projectName, List dataF // Change the reminder frequency to 1. log("Changing reminder settings. Setting reminder frequency to 1."); - saveSettings("2", "0", "1"); + savePrivateDataReminderSettings("2", "0", "1"); // Post reminders again. Since reminder frequency is set to 1, no reminders will be posted to the first data. postReminders(projectName, false, privateDataCount, -1, ++pipelineJobCount); verifyReminderPosted(projectName, privateData.get(0), 1); // No new reminders since reminder frequency is set to 1. @@ -171,7 +171,7 @@ private void testSendingReminders(String projectName, List dataF // Change reminder frequency to 0 again. log("Changing reminder settings. Setting reminder frequency to 0."); - saveSettings("2", "0", "0"); + savePrivateDataReminderSettings("2", "0", "0"); // Request extension for the first experiment. log("Requesting extension for experiment Id " + privateData.get(0).getExperimentAnnotationsId()); @@ -186,6 +186,7 @@ private void testSendingReminders(String projectName, List dataF // Request deletion for the second experiment. log("Requesting deletion for experiment Id " + privateData.get(1).getExperimentAnnotationsId()); requestDeletion(projectName, privateData.get(1)); + // Post reminders again - none should be posted postReminders(projectName, false, privateDataCount, -1, ++pipelineJobCount); verifyReminderPosted(projectName, privateData.get(0),1); // No new reminders since extension requested. @@ -291,24 +292,6 @@ private void verifyReminderPosted(String projectName, DataFolderInfo folderInfo, } } - private void saveSettings(String extensionLength, String delayUntilFirstReminder, String reminderFrequency) - { - goToAdminConsole().goToSettingsSection(); - clickAndWait(Locator.linkWithText("Panorama Public")); - clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - - setFormElement(Locator.input("delayUntilFirstReminder"), delayUntilFirstReminder); - setFormElement(Locator.input("reminderFrequency"), reminderFrequency); - setFormElement(Locator.input("extensionLength"), extensionLength); - clickButton("Save", 0); - waitForText("Private data reminder settings saved"); - clickAndWait(Locator.linkWithText("Back to Private Data Reminder Settings")); - - // clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - assertEquals(String.valueOf(delayUntilFirstReminder), getFormElement(Locator.input("delayUntilFirstReminder"))); - assertEquals(String.valueOf(reminderFrequency), getFormElement(Locator.input("reminderFrequency"))); - assertEquals(String.valueOf(extensionLength), getFormElement(Locator.input("extensionLength"))); - } private void postRemindersNoExperimentsSelected(String projectName, int expectedExperimentCount) { @@ -373,15 +356,6 @@ private void postReminders(String projectName, boolean testMode, int expectedExp } } - private void goToSendRemindersPage(String projectName) - { - goToAdminConsole().goToSettingsSection(); - clickAndWait(Locator.linkWithText("Panorama Public")); - clickAndWait(Locator.linkWithText("Private Data Reminder Settings")); - selectOptionByText(Locator.name("journal"), projectName); - clickAndWait(Locator.linkWithText("Send Reminders Now")); - waitForText(projectName, "A reminder message will be sent to the submitters of the selected experiments"); - } @Override protected void doCleanup(boolean afterTest) throws TestTimeoutException diff --git a/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java new file mode 100644 index 00000000..93e7b3bd --- /dev/null +++ b/panoramapublic/test/src/org/labkey/test/tests/panoramapublic/PublicationSearchTest.java @@ -0,0 +1,484 @@ +package org.labkey.test.tests.panoramapublic; + +import org.junit.After; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.labkey.remoteapi.CommandException; +import org.labkey.remoteapi.Connection; +import org.labkey.remoteapi.SimpleGetCommand; +import org.labkey.remoteapi.query.Filter; +import org.labkey.remoteapi.query.SelectRowsCommand; +import org.labkey.remoteapi.query.SelectRowsResponse; +import org.labkey.test.BaseWebDriverTest; +import org.labkey.test.Locator; +import org.labkey.test.TestProperties; +import org.labkey.test.TestTimeoutException; +import org.labkey.test.WebTestHelper; +import org.labkey.test.categories.External; +import org.labkey.test.categories.MacCossLabModules; +import org.labkey.test.util.ApiPermissionsHelper; +import org.labkey.test.util.DataRegionTable; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.labkey.test.util.PermissionsHelper.READER_ROLE; + +@Category({External.class, MacCossLabModules.class}) +@BaseWebDriverTest.ClassTimeout(minutes = 7) +public class PublicationSearchTest extends PanoramaPublicBaseTest +{ + private static final String SKY_FILE = "MRMer.zip"; + + private static final String SUBMITTER_1 = "submitter1@panoramapublic.test"; + private static final String SUBMITTER_2 = "submitter2@panoramapublic.test"; + private static final String ADMIN_USER = "admin@panoramapublic.test"; + + // --- Dataset 1: Abbatiello et al. (Mol Cell Proteomics 2013) --- + // Found via PubMed fallback (author + title match). Paper does not reference PXD010535 in PMC. + private static final String FOLDER_1 = "System Suitability Study"; + private static final String TARGET_FOLDER_1 = "System Suitability Copy"; + private static final String TITLE_1 = "Design, implementation and multisite evaluation of a system suitability protocol " + + "for the quantitative assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS)"; + private static final String PXD_1 = "PXD010535"; + private static final String PMID_1 = "23689285"; + private static final String ARTICLE_TITLE_1 = TITLE_1; // Same as experiment title + private static final String CITATION_1 = "Abbatiello SE, Mani DR, Schilling B, Maclean B, Zimmerman LJ, " + + "Feng X, Cusack MP, Sedransk N, Hall SC, Addona T, Allen S, Dodder NG, Ghosh M, Held JM, Hedrick V, " + + "Inerowicz HD, Jackson A, Keshishian H, Kim JW, Lyssand JS, Riley CP, Rudnick P, Sadowski P, " + + "Shaddox K, Smith D, Tomazela D, Wahlander A, Waldemarson S, Whitwell CA, You J, Zhang S, " + + "Kinsinger CR, Mesri M, Rodriguez H, Borchers CH, Buck C, Fisher SJ, Gibson BW, Liebler D, " + + "Maccoss M, Neubert TA, Paulovich A, Regnier F, Skates SJ, Tempst P, Wang M, Carr SA. " + + "Design, implementation and multisite evaluation of a system suitability protocol for the quantitative " + + "assessment of instrument performance in liquid chromatography-multiple reaction monitoring-MS (LC-MRM-MS). " + + "Mol Cell Proteomics. 2013 Sep;12(9):2623-39. doi: 10.1074/mcp.M112.027078. Epub 2013 May 20. " + + "PMID: 23689285; PMCID: PMC3769335."; + + // --- Dataset 2: Wen et al. (Nat Commun 2025) --- + // Found via PMC search by PXD. PMC returns two results: Nature article and bioRxiv preprint. + // The preprint is filtered out by isPreprint(). + private static final String FOLDER_2 = "Carafe"; + private static final String TARGET_FOLDER_2 = "Carafe Copy"; + private static final String TITLE_2 = "Carafe: a tool for in silico spectral library generation for DIA proteomics"; + private static final String PXD_2 = "PXD056793"; + private static final String PMID_2 = "41198693"; + private static final String PMC_ID_2 = "12592563"; + private static final String PMC_ID_2_PREPRINT = "11507862"; + private static final String ARTICLE_TITLE_2 = "Carafe enables high quality in silico spectral library generation " + + "for data-independent acquisition proteomics."; // Not identical to the experiment title but keywords match + private static final String CITATION_2 = "Wen B, Hsu C, Shteynberg D, Zeng WF, Riffle M, Chang A, Mudge MC, " + + "Nunn BL, MacLean BX, Berg MD, Villén J, MacCoss MJ, Noble WS. " + + "Carafe enables high quality in silico spectral library generation for data-independent acquisition proteomics. " + + "Nat Commun. 2025 Nov 6;16(1):9815. doi: 10.1038/s41467-025-64928-4. " + + "PMID: 41198693; PMCID: PMC12592563."; + + private boolean _useMockNcbi = false; + private Map _originalReminderSettings = null; + + /** + * Tests the publication search flow with two datasets: + * Dataset 1 (Abbatiello et al.): search → notify → dismiss → verify not suggested again + * Dataset 2 (Wen et al.): verify publication found via pipeline job reminders + * Uses mock NCBI service when running on TeamCity. + */ + @Test + public void testPublicationSearchAndDismiss() + { + String panoramaPublicProject = PANORAMA_PUBLIC; + goToProjectHome(panoramaPublicProject); + ApiPermissionsHelper permissionsHelper = new ApiPermissionsHelper(this); + permissionsHelper.setSiteGroupPermissions("Guests", READER_ROLE); + + // Step 1: Set up mock NCBI service if running on TeamCity + setupMockNcbiService(); + + // Step 2: Create dataset 1 folder, submit to Panorama Public, and copy + String testProject = getProjectName(); + String shortAccessUrl1 = setupFolderSubmitAndCopy(testProject, FOLDER_1, TARGET_FOLDER_1, + TITLE_1, SUBMITTER_1, "Schilling", "Birgit", ADMIN_USER, SKY_FILE); + int exptId1 = getExperimentId(panoramaPublicProject, TARGET_FOLDER_1); + assignPxdId(panoramaPublicProject, TARGET_FOLDER_1, exptId1, PXD_1); + + // Step 3: Create dataset 2 folder, submit to Panorama Public, and copy + String shortAccessUrl2 = setupFolderSubmitAndCopy(testProject, FOLDER_2, TARGET_FOLDER_2, + TITLE_2, SUBMITTER_2, "Wen", "Bo", ADMIN_USER, SKY_FILE); + int exptId2 = getExperimentId(panoramaPublicProject, TARGET_FOLDER_2); + assignPxdId(panoramaPublicProject, TARGET_FOLDER_2, exptId2, PXD_2); + + // Step 4: Search publications for dataset 1 — should find PMID 23689285 via PubMed fallback + searchPublicationsForDataset(panoramaPublicProject, TARGET_FOLDER_1, exptId1); + assertTextPresent(PMID_1); + assertTextPresent("Author, Title"); + assertTextNotPresent("ProteomeXchange ID"); // Paper does not contain the PXD + + // Step 5: Select the publication and notify the submitter + click(Locator.radioButtonByNameAndValue("publicationId", PMID_1)); + clickButton("Notify Submitter"); + + // Step 6: Verify the reminder message was posted in the support thread + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); + assertTextPresent("Action Required: Publication Found for Your Data on Panorama Public"); + assertTextPresent("We found a paper that appears to be associated with your private data on Panorama Public"); + assertTextPresent("Abbatiello SE, Mani DR, Schilling B"); + assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID_1)); + + // Step 6a: Verify DatasetStatus was updated for dataset 1 after notifying submitter + Map dsStatus1 = getDatasetStatus(panoramaPublicProject, TARGET_FOLDER_1, exptId1); + assertNotNull("Expected DatasetStatus row for dataset 1 after notification", dsStatus1); + assertNotNull("Expected lastReminderDate to be set after notification", dsStatus1.get("LastReminderDate")); + assertEquals("Expected potentialPublicationId to be set", PMID_1, dsStatus1.get("PotentialPublicationId")); + assertEquals("Expected publicationType to be PubMed", "PubMed", dsStatus1.get("PublicationType")); + assertNotNull("Expected citation to be cached", dsStatus1.get("Citation")); + + // Verify DatasetStatus does NOT exist for dataset 2 yet (no notification sent) + Map dsStatus2Before = getDatasetStatus(panoramaPublicProject, TARGET_FOLDER_2, exptId2); + assertNull("Expected no DatasetStatus row for dataset 2 before reminders", dsStatus2Before); + + // Step 7: Impersonate submitter, dismiss the publication suggestion + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + impersonate(SUBMITTER_1); + waitForText("Submitted - " + shortAccessUrl1); + click(Locator.linkWithText("Dismiss Publication Suggestion")); + waitForText("You are dismissing the publication suggestion for your data on Panorama Public"); + assertTextPresent(shortAccessUrl1); + clickButton("OK", 0); + waitForText("The publication suggestion has been dismissed"); + stopImpersonating(); + + // Verify the dismissal notification was posted in the support thread + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); + assertTextPresentInThisOrder( + "Publication Suggestion Dismissed", + "Thank you for letting us know that the suggested paper is not associated with your data on Panorama Public"); + + // Step 8: Search publications for dataset 1 again and verify the dismissed publication is flagged + searchPublicationsForDataset(panoramaPublicProject, TARGET_FOLDER_1, exptId1); + assertTextPresent(PMID_1); + assertTextPresent("User Dismissed"); + + // Verify the dismissed cell contains "Yes" + Locator dismissedCell = Locator.xpath("//tr[.//input[@value='" + PMID_1 + "']]/td[last()]"); + assertEquals("Yes", getText(dismissedCell)); + + // Verify that we cannot notify the submitter for a dismissed publication + click(Locator.radioButtonByNameAndValue("publicationId", PMID_1)); + clickButton("Notify Submitter"); + assertTextPresent("The user has already dismissed the publication suggestion PubMed ID " + PMID_1 + " for this dataset"); + + // Step 9: Run reminders in TEST MODE — verify DatasetStatus is NOT updated + // Save current settings so they can be restored in doCleanup + _originalReminderSettings = getPrivateDataReminderSettings(); + savePrivateDataReminderSettings("2", "0", "0", true); + + // Post reminders in test mode with publication search enabled + goToSendRemindersPage(panoramaPublicProject); + DataRegionTable table = new DataRegionTable("ExperimentAnnotations", getDriver()); + table.checkAllOnPage(); + assertChecked(Locator.checkboxByName("searchPublications")); + checkCheckbox(Locator.checkboxByName("testMode")); + clickButton("Post Reminders", 0); + waitForPipelineJobsToComplete(1, "Post private data reminder messages", false); + + // Verify test mode was logged + goToProjectHome(panoramaPublicProject); + goToDataPipeline(); + goToDataPipeline().clickStatusLink(0); + assertTextPresent("RUNNING IN TEST MODE - MESSAGES WILL NOT BE POSTED."); + + // Verify DatasetStatus for dataset 2 was NOT updated in test mode + Map dsStatus2TestMode = getDatasetStatus(panoramaPublicProject, TARGET_FOLDER_2, exptId2); + assertNull("Expected no DatasetStatus row for dataset 2 after test-mode run", dsStatus2TestMode); + + // Step 10: Run reminders in ACTUAL mode and verify the following: + // 1. pipeline job skips searching for publicaiton for dataset 1 + // 2. message posted for dataset 1 is about reminding user to make data public rather than about a publication + // 3. a publication is found for dataset 2 and message posted to the message thread + // 4. verify DatasetStatus for dataset 2 IS updated + goToSendRemindersPage(panoramaPublicProject); + table = new DataRegionTable("ExperimentAnnotations", getDriver()); + table.checkAllOnPage(); + assertChecked(Locator.checkboxByName("searchPublications")); + clickButton("Post Reminders", 0); + waitForPipelineJobsToComplete(2, "Post private data reminder messages", false); + + // Verify the pipeline job log contains the skip message for the dismissed dataset 1 publication + goToProjectHome(panoramaPublicProject); + goToDataPipeline(); + goToDataPipeline().clickStatusLink(0); + String skipMessage = String.format("User dismissed publication for experiment %d", exptId1); + assertTextPresent(skipMessage); + assertTextPresent("Publication search deferred"); + + // Verify that the reminder for the dismissed dataset 1 is about making data public, not about a publication. + // The message thread already has a "Publication Found" message from Step 6 (before dismissal), + // so we check that the pipeline job posted a "Status Update" message that will appear after the "Publication Found" message. + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_1); + waitForText("Submitted - " + shortAccessUrl1); + assertTextPresentInThisOrder("Action Required: Publication Found for Your Data on Panorama Public", + "Action Required: Status Update for Your Private Data on Panorama Public"); + + // Verify that the pipeline job found a publication for dataset 2 and posted a message + goToSupportMessages(panoramaPublicProject, TARGET_FOLDER_2); + waitForText("Submitted - " + shortAccessUrl2); + assertTextPresent("Action Required: Publication Found for Your Data on Panorama Public"); + assertTextPresent("We found a paper that appears to be associated with your private data on Panorama Public"); + assertTextPresent("Wen B, Hsu C, Shteynberg D"); + assertElementPresent(Locator.linkWithHref("https://pubmed.ncbi.nlm.nih.gov/" + PMID_2)); + + // Verify DatasetStatus for dataset 2 was updated after actual posting + Map dsStatus2AfterPost = getDatasetStatus(panoramaPublicProject, TARGET_FOLDER_2, exptId2); + assertNotNull("Expected DatasetStatus row for dataset 2 after actual posting", dsStatus2AfterPost); + assertEquals("Expected potentialPublicationId for dataset 2", PMID_2, dsStatus2AfterPost.get("PotentialPublicationId")); + assertNotNull("Expected publicationType for dataset 2", dsStatus2AfterPost.get("PublicationType")); + assertNotNull("Expected lastReminderDate for dataset 2", dsStatus2AfterPost.get("LastReminderDate")); + assertNotNull("Expected citation to be cached for dataset 2", dsStatus2AfterPost.get("Citation")); + } + + /* + * Navigate to the Panorama Public copy folder and get the experiment ID. + */ + private int getExperimentId(String panoramaPublicProject, String targetFolder) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + goToExperimentDetailsPage(); + return Integer.parseInt(portalHelper.getUrlParam("id")); + } + + /* + * Assign a PXD ID to an experiment using the UpdatePxDetails page. + * Cannot use UpdateRowsCommand because the ExperimentAnnotations table is not updatable via the HTTP-based APIs. + */ + private void assignPxdId(String projectName, String folderName, int exptId, String pxdId) + { + goToProjectFolder(projectName, folderName); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "updatePxDetails", Map.of("id", String.valueOf(exptId)))); + waitForText("Update ProteomeXchange Details"); + setFormElement(Locator.input("pxId"), pxdId); + clickButton("Update"); + log("Assigned PXD ID " + pxdId + " to experiment " + exptId); + } + + /* + * Navigate to the search publications page for a dataset. + */ + private void searchPublicationsForDataset(String panoramaPublicProject, String targetFolder, int exptId) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + beginAt(WebTestHelper.buildURL("panoramapublic", getCurrentContainerPath(), + "searchPublicationsForDataset", Map.of("id", String.valueOf(exptId)))); + waitForText("Publications Matches for Dataset"); + } + + /* + * Navigate to the support messages for a dataset. + */ + private void goToSupportMessages(String panoramaPublicProject, String targetFolder) + { + goToProjectFolder(panoramaPublicProject, targetFolder); + portalHelper.clickWebpartMenuItem("Targeted MS Experiment", true, "Support Messages"); + } + + /* + * Query the DatasetStatus table for a given experiment ID. + * Returns null if no row exists, or a map of column name → value. + */ + private Map getDatasetStatus(String panoramaPublicProject, String targetFolder, int exptId) + { + try + { + Connection connection = createDefaultConnection(); + SelectRowsCommand cmd = new SelectRowsCommand("panoramapublic", "DatasetStatus"); + cmd.addFilter(new Filter("ExperimentAnnotationsId", exptId)); + cmd.setColumns(List.of("ExperimentAnnotationsId", "PotentialPublicationId", "PublicationType", + "PublicationMatchInfo", "Citation", "LastReminderDate", "UserDismissedPublication")); + String containerPath = "/" + panoramaPublicProject + "/" + targetFolder; + SelectRowsResponse resp = cmd.execute(connection, containerPath); + List> rows = resp.getRows(); + if (rows.isEmpty()) + { + return null; + } + assertEquals("Expected at most one DatasetStatus row for experiment " + exptId, 1, rows.size()); + return rows.get(0); + } + catch (IOException | CommandException e) + { + fail("Failed to query DatasetStatus: " + e.getMessage()); + return null; + } + } + + /* + * On TeamCity: set up the mock NCBI service and register mock publication data. + * On dev machine: verify that NCBI is reachable; fail with a clear message if not. + */ + private void setupMockNcbiService() + { + boolean useMockNcbiService = TestProperties.isTestRunningOnTeamCity(); + if (useMockNcbiService) + { + initMockNcbiService(); + } + else + { + assertTrue("NCBI E-utilities API is not reachable. Check your network connection.", isNcbiReachable()); + log("Using real NCBI service"); + } + } + + private void initMockNcbiService() + { + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "setupMockNcbiService"); + command.execute(connection, "/"); + _useMockNcbi = true; + log("Using mock NCBI service"); + registerMockPublications(); + } + catch (IOException | CommandException e) + { + fail("Failed to set up mock NCBI service: " + e.getMessage()); + } + } + + private static boolean isNcbiReachable() + { + try + { + java.net.HttpURLConnection conn = (java.net.HttpURLConnection) + new java.net.URL("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi").openConnection(); + conn.setRequestMethod("GET"); + conn.setRequestProperty("User-Agent", "PanoramaPublic/1.0"); + conn.setConnectTimeout(5000); + conn.setReadTimeout(5000); + int responseCode = conn.getResponseCode(); + boolean reachable = responseCode >= 200 && responseCode < 300; + conn.disconnect(); + return reachable; + } + catch (IOException e) + { + return false; + } + } + + + /* + * Register mock publication data with the mock NCBI service. + */ + private void registerMockPublications() + { + // Dataset 1: PMID 23689285 (Abbatiello et al.) — found via PubMed fallback (author + title match) + // Paper does not reference PXD010535 in PMC. + registerMockPublication("pubmed", PMID_1, "Schilling", + null, ARTICLE_TITLE_1, + "Abbatiello SE,Mani DR,Schilling B,Maclean B,Zimmerman LJ,Carr SA", + "2013/09/01 00:00", "Mol Cell Proteomics", "Molecular & cellular proteomics : MCP", + CITATION_1); + + // Dataset 2: PMID 41198693 (Wen et al.) — found via PMC search by PXD ID + // PMC search for PXD056793 returns two articles: Nature + bioRxiv preprint. + + // Nature article (published) — PMC ID 12592563, PMID 41198693 + registerMockPublication("pmc", PMC_ID_2, PXD_2, + PMID_2, ARTICLE_TITLE_2, + "Wen B,Hsu C,Shteynberg D,Zeng WF,Riffle M,Chang A,Mudge MC,Nunn BL,MacLean BX,Berg MD,Villén J,MacCoss MJ,Noble WS", + "2025/11/06 00:00", "Nat Commun", "Nature communications", + CITATION_2); + + // bioRxiv preprint — PMC ID 11507862, PMID 39463980 (will be filtered out by isPreprint) + registerMockPublication("pmc", PMC_ID_2_PREPRINT, PXD_2, + "39463980", ARTICLE_TITLE_2, + "Wen B,Hsu C,Shteynberg D,Zeng WF,Riffle M,Chang A,Mudge M,Nunn BL,MacLean BX,Berg MD,Villén J,MacCoss MJ,Noble WS", + "2025/08/04 00:00", "bioRxiv", "bioRxiv : the preprint server for biology", + null); + } + + /* + * Register a single mock article with the mock NCBI service via the RegisterMockPublicationAction API. + */ + private void registerMockPublication(String database, String id, String searchKey, + String pmid, String title, String authors, + String sortDate, String source, String journalFull, + String citation) + { + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "registerMockPublication"); + Map params = new HashMap<>(); + params.put("database", database); + params.put("id", id); + params.put("searchKey", searchKey); + params.put("title", title); + params.put("authors", authors); + params.put("pubDate", sortDate); + params.put("source", source); + params.put("journalFull", journalFull); + if (pmid != null) params.put("pmid", pmid); + if (citation != null) params.put("citation", citation); + command.setParameters(params); + command.execute(connection, "/"); + log("Registered mock publication: " + database + " " + id); + } + catch (IOException | CommandException e) + { + fail("Failed to register mock publication: " + e.getMessage()); + } + } + + /* + * Restore the real NCBI service. + */ + private void restoreNcbiService() + { + try + { + Connection connection = createDefaultConnection(); + SimpleGetCommand command = new SimpleGetCommand("panoramapublic", "restoreNcbiService"); + command.execute(connection, "/"); + log("Restored real NCBI service"); + } + catch (IOException | CommandException e) + { + log("Warning: Failed to restore NCBI service: " + e.getMessage()); + } + } + + @After + public void resetAfterTest() + { + if (_useMockNcbi) + { + restoreNcbiService(); + } + + if (_originalReminderSettings != null) + { + savePrivateDataReminderSettings( + _originalReminderSettings.get("extensionLength"), + _originalReminderSettings.get("delayUntilFirstReminder"), + _originalReminderSettings.get("reminderFrequency"), + Boolean.parseBoolean(_originalReminderSettings.get("enablePublicationSearch"))); + } + } + + @Override + protected void doCleanup(boolean afterTest) throws TestTimeoutException + { + _userHelper.deleteUsers(false, SUBMITTER_1, SUBMITTER_2, ADMIN_USER); + super.doCleanup(afterTest); + } +}