diff --git a/ai/discover/build.gradle.kts b/ai/discover/build.gradle.kts index 42286e6e..a10c838c 100644 --- a/ai/discover/build.gradle.kts +++ b/ai/discover/build.gradle.kts @@ -20,4 +20,5 @@ dependencies { testImplementation(libs.kotlin.test) testImplementation(libs.kotlinx.coroutines.test) + testImplementation(libs.ktor.client.mock) } diff --git a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiConfig.kt b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiConfig.kt index 2da6ed63..9a7a8a78 100644 --- a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiConfig.kt +++ b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiConfig.kt @@ -38,12 +38,16 @@ data class LlmConfig( /** * Search provider configuration. * - * @param provider search API provider type - * @param apiKey search API key if needed + * @param provider search API provider type: `"bing"`, `"google"`, or + * `"llm"` (default, no-op fallback) + * @param apiKey search API key (Bing subscription key or Google API key) + * @param cx Google Custom Search Engine ID (only used when + * [provider] is `"google"`) */ data class SearchConfig( val provider: String = "llm", val apiKey: String = "", + val cx: String = "", ) /** diff --git a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiModule.kt b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiModule.kt index dbb290d1..6704de80 100644 --- a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiModule.kt +++ b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/AiModule.kt @@ -4,12 +4,17 @@ import com.linroid.ketch.ai.agent.DiscoveryStepListener import com.linroid.ketch.ai.fetch.ContentExtractor import com.linroid.ketch.ai.fetch.SafeFetcher import com.linroid.ketch.ai.fetch.UrlValidator +import com.linroid.ketch.ai.search.BingSearchProvider import com.linroid.ketch.ai.search.DummySearchProvider +import com.linroid.ketch.ai.search.GoogleSearchProvider import com.linroid.ketch.ai.search.SearchProvider import com.linroid.ketch.ai.site.SiteProfileStore import com.linroid.ketch.ai.site.SiteProfiler import io.ktor.client.HttpClient import io.ktor.client.plugins.HttpTimeout +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import io.ktor.serialization.kotlinx.json.json +import kotlinx.serialization.json.Json /** * Pre-built AI module components ready for integration. @@ -57,7 +62,10 @@ class AiModule( val siteProfiler = SiteProfiler(fetcher) val resolvedSearchProvider = - searchProvider ?: DummySearchProvider() + searchProvider ?: resolveSearchProvider( + config.search, + createSearchClient(), + ) val discoveryService = ResourceDiscoveryService( searchProvider = resolvedSearchProvider, @@ -74,5 +82,24 @@ class AiModule( siteProfileStore = siteProfileStore, ) } + + private fun createSearchClient(): HttpClient = HttpClient { + install(ContentNegotiation) { + json(Json { ignoreUnknownKeys = true }) + } + } + + internal fun resolveSearchProvider( + config: SearchConfig, + httpClient: HttpClient, + ): SearchProvider = when (config.provider.lowercase()) { + "bing" -> BingSearchProvider(httpClient, config.apiKey) + "google" -> GoogleSearchProvider( + httpClient, + config.apiKey, + config.cx, + ) + else -> DummySearchProvider() + } } } diff --git a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/BingSearchProvider.kt b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/BingSearchProvider.kt new file mode 100644 index 00000000..ad6e66bb --- /dev/null +++ b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/BingSearchProvider.kt @@ -0,0 +1,82 @@ +package com.linroid.ketch.ai.search + +import io.ktor.client.HttpClient +import io.ktor.client.call.body +import io.ktor.client.request.get +import io.ktor.client.request.header +import io.ktor.client.request.parameter +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * Search provider backed by the Bing Web Search API v7. + * + * @param httpClient Ktor client with `ContentNegotiation` + JSON installed + * @param apiKey Bing Search API subscription key + * @param market market code for result localization (e.g. `"en-US"`) + */ +internal class BingSearchProvider( + private val httpClient: HttpClient, + private val apiKey: String, + private val market: String = "en-US", +) : SearchProvider { + + override suspend fun search( + query: String, + sites: List, + maxResults: Int, + ): List = try { + val fullQuery = buildQuery(query, sites) + val response: BingSearchResponse = httpClient.get(API_URL) { + header("Ocp-Apim-Subscription-Key", apiKey) + parameter("q", fullQuery) + parameter("count", maxResults.coerceIn(1, 50)) + parameter("mkt", market) + }.body() + response.webPages?.value?.map { page -> + SearchResult( + url = page.url, + title = page.name, + snippet = page.snippet, + ) + } ?: emptyList() + } catch (_: Exception) { + emptyList() + } + + companion object { + private const val API_URL = + "https://api.bing.microsoft.com/v7.0/search" + + /** + * Builds a Bing search query with optional `site:` operators. + */ + internal fun buildQuery( + query: String, + sites: List, + ): String { + if (sites.isEmpty()) return query + val siteOps = sites.joinToString(" OR ") { "site:$it" } + return "$query ($siteOps)" + } + } +} + +@Serializable +internal data class BingSearchResponse( + val webPages: BingWebPages? = null, +) + +@Serializable +internal data class BingWebPages( + val value: List = emptyList(), +) + +@Serializable +internal data class BingWebPage( + val name: String, + val url: String, + val snippet: String = "", + @SerialName("dateLastCrawled") + val dateLastCrawled: String? = null, +) diff --git a/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/GoogleSearchProvider.kt b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/GoogleSearchProvider.kt new file mode 100644 index 00000000..90fb7d57 --- /dev/null +++ b/ai/discover/src/main/kotlin/com/linroid/ketch/ai/search/GoogleSearchProvider.kt @@ -0,0 +1,79 @@ +package com.linroid.ketch.ai.search + +import io.ktor.client.HttpClient +import io.ktor.client.call.body +import io.ktor.client.request.get +import io.ktor.client.request.parameter +import kotlinx.serialization.Serializable + +/** + * Search provider backed by the Google Custom Search JSON API. + * + * @param httpClient Ktor client with `ContentNegotiation` + JSON installed + * @param apiKey Google API key + * @param cx Custom Search Engine ID + */ +internal class GoogleSearchProvider( + private val httpClient: HttpClient, + private val apiKey: String, + private val cx: String, +) : SearchProvider { + + override suspend fun search( + query: String, + sites: List, + maxResults: Int, + ): List = try { + val num = maxResults.coerceIn(1, 10) + val response: GoogleSearchResponse = httpClient.get(API_URL) { + parameter("key", apiKey) + parameter("cx", cx) + parameter("num", num) + if (sites.size == 1) { + parameter("q", query) + parameter("siteSearch", sites.single()) + } else { + parameter("q", buildMultiSiteQuery(query, sites)) + } + }.body() + response.items?.map { item -> + SearchResult( + url = item.link, + title = item.title, + snippet = item.snippet ?: "", + ) + } ?: emptyList() + } catch (_: Exception) { + emptyList() + } + + companion object { + private const val API_URL = + "https://www.googleapis.com/customsearch/v1" + + /** + * Builds a query with `site:` operators for multiple domains. + * For zero or one site, returns [query] unchanged. + */ + internal fun buildMultiSiteQuery( + query: String, + sites: List, + ): String { + if (sites.size <= 1) return query + val siteOps = sites.joinToString(" OR ") { "site:$it" } + return "$query ($siteOps)" + } + } +} + +@Serializable +internal data class GoogleSearchResponse( + val items: List? = null, +) + +@Serializable +internal data class GoogleSearchItem( + val title: String, + val link: String, + val snippet: String? = null, +) diff --git a/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderIntegrationTest.kt b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderIntegrationTest.kt new file mode 100644 index 00000000..80ef36c9 --- /dev/null +++ b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderIntegrationTest.kt @@ -0,0 +1,95 @@ +package com.linroid.ketch.ai.search + +import io.ktor.client.HttpClient +import io.ktor.client.engine.mock.MockEngine +import io.ktor.client.engine.mock.respond +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import io.ktor.http.ContentType +import io.ktor.http.HttpHeaders +import io.ktor.http.HttpStatusCode +import io.ktor.http.headersOf +import io.ktor.serialization.kotlinx.json.json +import kotlinx.coroutines.test.runTest +import kotlinx.serialization.json.Json +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue + +class BingSearchProviderIntegrationTest { + + private val jsonHeaders = headersOf( + HttpHeaders.ContentType, + ContentType.Application.Json.toString(), + ) + + private fun createClient( + respondJson: String, + status: HttpStatusCode = HttpStatusCode.OK, + ): HttpClient = HttpClient(MockEngine) { + engine { + addHandler { + respond(respondJson, status, jsonHeaders) + } + } + install(ContentNegotiation) { + json(Json { ignoreUnknownKeys = true }) + } + } + + @Test + fun search_parsesResults() = runTest { + val json = """ + { + "webPages": { + "value": [ + { + "name": "Ubuntu Downloads", + "url": "https://ubuntu.com/download", + "snippet": "Download Ubuntu Desktop" + }, + { + "name": "Ubuntu Releases", + "url": "https://releases.ubuntu.com/", + "snippet": "All Ubuntu releases" + } + ] + } + } + """.trimIndent() + + val provider = BingSearchProvider(createClient(json), "test-key") + val results = provider.search("ubuntu iso") + + assertEquals(2, results.size) + assertEquals("Ubuntu Downloads", results[0].title) + assertEquals("https://ubuntu.com/download", results[0].url) + assertEquals("Download Ubuntu Desktop", results[0].snippet) + assertEquals("Ubuntu Releases", results[1].title) + } + + @Test + fun search_emptyWebPages_returnsEmptyList() = runTest { + val json = """{ "webPages": { "value": [] } }""" + val provider = BingSearchProvider(createClient(json), "test-key") + val results = provider.search("nothing") + assertTrue(results.isEmpty()) + } + + @Test + fun search_noWebPagesField_returnsEmptyList() = runTest { + val json = """{ "_type": "SearchResponse" }""" + val provider = BingSearchProvider(createClient(json), "test-key") + val results = provider.search("nothing") + assertTrue(results.isEmpty()) + } + + @Test + fun search_httpError_returnsEmptyList() = runTest { + val provider = BingSearchProvider( + createClient("{}", HttpStatusCode.Unauthorized), + "bad-key", + ) + val results = provider.search("test") + assertTrue(results.isEmpty()) + } +} diff --git a/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderTest.kt b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderTest.kt new file mode 100644 index 00000000..7caeb531 --- /dev/null +++ b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/BingSearchProviderTest.kt @@ -0,0 +1,34 @@ +package com.linroid.ketch.ai.search + +import kotlin.test.Test +import kotlin.test.assertEquals + +class BingSearchProviderTest { + + @Test + fun buildQuery_noSites_returnsQueryUnchanged() { + val result = BingSearchProvider.buildQuery("ubuntu iso", emptyList()) + assertEquals("ubuntu iso", result) + } + + @Test + fun buildQuery_singleSite_appendsSiteOperator() { + val result = BingSearchProvider.buildQuery( + "ubuntu iso", + listOf("ubuntu.com"), + ) + assertEquals("ubuntu iso (site:ubuntu.com)", result) + } + + @Test + fun buildQuery_multipleSites_joinsWithOR() { + val result = BingSearchProvider.buildQuery( + "download", + listOf("example.com", "test.org", "foo.net"), + ) + assertEquals( + "download (site:example.com OR site:test.org OR site:foo.net)", + result, + ) + } +} diff --git a/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderIntegrationTest.kt b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderIntegrationTest.kt new file mode 100644 index 00000000..cf040193 --- /dev/null +++ b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderIntegrationTest.kt @@ -0,0 +1,130 @@ +package com.linroid.ketch.ai.search + +import io.ktor.client.HttpClient +import io.ktor.client.engine.mock.MockEngine +import io.ktor.client.engine.mock.respond +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import io.ktor.http.ContentType +import io.ktor.http.HttpHeaders +import io.ktor.http.HttpStatusCode +import io.ktor.http.headersOf +import io.ktor.serialization.kotlinx.json.json +import kotlinx.coroutines.test.runTest +import kotlinx.serialization.json.Json +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue + +class GoogleSearchProviderIntegrationTest { + + private val jsonHeaders = headersOf( + HttpHeaders.ContentType, + ContentType.Application.Json.toString(), + ) + + private fun createClient( + respondJson: String, + status: HttpStatusCode = HttpStatusCode.OK, + ): HttpClient = HttpClient(MockEngine) { + engine { + addHandler { + respond(respondJson, status, jsonHeaders) + } + } + install(ContentNegotiation) { + json(Json { ignoreUnknownKeys = true }) + } + } + + @Test + fun search_parsesResults() = runTest { + val json = """ + { + "items": [ + { + "title": "FFmpeg Download", + "link": "https://ffmpeg.org/download.html", + "snippet": "Download FFmpeg builds" + }, + { + "title": "FFmpeg Releases", + "link": "https://ffmpeg.org/releases/", + "snippet": "Source releases" + } + ] + } + """.trimIndent() + + val provider = GoogleSearchProvider( + createClient(json), + "test-key", + "test-cx", + ) + val results = provider.search("ffmpeg download") + + assertEquals(2, results.size) + assertEquals("FFmpeg Download", results[0].title) + assertEquals("https://ffmpeg.org/download.html", results[0].url) + assertEquals("Download FFmpeg builds", results[0].snippet) + assertEquals("FFmpeg Releases", results[1].title) + } + + @Test + fun search_noItems_returnsEmptyList() = runTest { + val json = """{ "searchInformation": { "totalResults": "0" } }""" + val provider = GoogleSearchProvider( + createClient(json), + "test-key", + "test-cx", + ) + val results = provider.search("nothing") + assertTrue(results.isEmpty()) + } + + @Test + fun search_emptyItems_returnsEmptyList() = runTest { + val json = """{ "items": [] }""" + val provider = GoogleSearchProvider( + createClient(json), + "test-key", + "test-cx", + ) + val results = provider.search("nothing") + assertTrue(results.isEmpty()) + } + + @Test + fun search_httpError_returnsEmptyList() = runTest { + val provider = GoogleSearchProvider( + createClient("{}", HttpStatusCode.Forbidden), + "bad-key", + "test-cx", + ) + val results = provider.search("test") + assertTrue(results.isEmpty()) + } + + @Test + fun search_nullSnippet_defaultsToEmpty() = runTest { + val json = """ + { + "items": [ + { + "title": "No Snippet Page", + "link": "https://example.com" + } + ] + } + """.trimIndent() + + val provider = GoogleSearchProvider( + createClient(json), + "test-key", + "test-cx", + ) + val results = provider.search("test") + + assertEquals(1, results.size) + assertEquals("", results[0].snippet) + } +} diff --git a/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderTest.kt b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderTest.kt new file mode 100644 index 00000000..05aa94de --- /dev/null +++ b/ai/discover/src/test/kotlin/com/linroid/ketch/ai/search/GoogleSearchProviderTest.kt @@ -0,0 +1,37 @@ +package com.linroid.ketch.ai.search + +import kotlin.test.Test +import kotlin.test.assertEquals + +class GoogleSearchProviderTest { + + @Test + fun buildMultiSiteQuery_noSites_returnsQueryUnchanged() { + val result = GoogleSearchProvider.buildMultiSiteQuery( + "ffmpeg download", + emptyList(), + ) + assertEquals("ffmpeg download", result) + } + + @Test + fun buildMultiSiteQuery_singleSite_returnsQueryUnchanged() { + val result = GoogleSearchProvider.buildMultiSiteQuery( + "ffmpeg download", + listOf("ffmpeg.org"), + ) + assertEquals("ffmpeg download", result) + } + + @Test + fun buildMultiSiteQuery_multipleSites_joinsWithOR() { + val result = GoogleSearchProvider.buildMultiSiteQuery( + "download", + listOf("example.com", "test.org"), + ) + assertEquals( + "download (site:example.com OR site:test.org)", + result, + ) + } +} diff --git a/app/android/src/main/kotlin/com/linroid/ketch/app/android/KetchService.kt b/app/android/src/main/kotlin/com/linroid/ketch/app/android/KetchService.kt index 74ef46c8..de1d514f 100644 --- a/app/android/src/main/kotlin/com/linroid/ketch/app/android/KetchService.kt +++ b/app/android/src/main/kotlin/com/linroid/ketch/app/android/KetchService.kt @@ -15,6 +15,7 @@ import androidx.core.app.ServiceCompat import com.linroid.ketch.ai.AiConfig import com.linroid.ketch.ai.AiModule import com.linroid.ketch.ai.LlmConfig +import com.linroid.ketch.ai.SearchConfig import com.linroid.ketch.api.log.KetchLogger import com.linroid.ketch.app.instance.InstanceFactory import com.linroid.ketch.app.instance.InstanceManager @@ -123,6 +124,7 @@ class KetchService : Service() { AiConfig( enabled = true, llm = LlmConfig(apiKey = apiKey), + search = resolveSearchConfig(), ), ) aiProvider = EmbeddedAiDiscoveryProvider( @@ -258,6 +260,24 @@ class KetchService : Service() { companion object { private const val CHANNEL_ID = "ketch_service" private const val NOTIFICATION_ID = 1 - private const val ACTION_REPOST_NOTIFICATION = "com.linroid.ketch.app.android.action.REPOST_NOTIFICATION" + private const val ACTION_REPOST_NOTIFICATION = + "com.linroid.ketch.app.android.action.REPOST_NOTIFICATION" } } + +private fun resolveSearchConfig(): SearchConfig { + val bingKey = System.getenv("BING_SEARCH_API_KEY") + if (!bingKey.isNullOrBlank()) { + return SearchConfig(provider = "bing", apiKey = bingKey) + } + val googleKey = System.getenv("GOOGLE_SEARCH_API_KEY") + val googleCx = System.getenv("GOOGLE_SEARCH_CX") + if (!googleKey.isNullOrBlank() && !googleCx.isNullOrBlank()) { + return SearchConfig( + provider = "google", + apiKey = googleKey, + cx = googleCx, + ) + } + return SearchConfig() +} diff --git a/app/desktop/src/main/kotlin/com/linroid/ketch/app/desktop/main.kt b/app/desktop/src/main/kotlin/com/linroid/ketch/app/desktop/main.kt index c2690f44..2cfc5f3f 100644 --- a/app/desktop/src/main/kotlin/com/linroid/ketch/app/desktop/main.kt +++ b/app/desktop/src/main/kotlin/com/linroid/ketch/app/desktop/main.kt @@ -8,6 +8,7 @@ import androidx.compose.ui.window.application import com.linroid.ketch.ai.AiConfig import com.linroid.ketch.ai.AiModule import com.linroid.ketch.ai.LlmConfig +import com.linroid.ketch.ai.SearchConfig import com.linroid.ketch.app.App import com.linroid.ketch.app.instance.InstanceFactory import com.linroid.ketch.app.instance.InstanceManager @@ -21,6 +22,23 @@ import com.linroid.ketch.sqlite.createSqliteTaskStore import java.io.File import java.net.InetAddress +private fun resolveSearchConfig(): SearchConfig { + val bingKey = System.getenv("BING_SEARCH_API_KEY") + if (!bingKey.isNullOrBlank()) { + return SearchConfig(provider = "bing", apiKey = bingKey) + } + val googleKey = System.getenv("GOOGLE_SEARCH_API_KEY") + val googleCx = System.getenv("GOOGLE_SEARCH_CX") + if (!googleKey.isNullOrBlank() && !googleCx.isNullOrBlank()) { + return SearchConfig( + provider = "google", + apiKey = googleKey, + cx = googleCx, + ) + } + return SearchConfig() +} + fun main() = application { val instanceManager = remember { val configDir = defaultConfigDir() @@ -74,6 +92,7 @@ fun main() = application { AiConfig( enabled = true, llm = LlmConfig(apiKey = apiKey), + search = resolveSearchConfig(), ), ) EmbeddedAiDiscoveryProvider(aiModule.discoveryService) diff --git a/cli/src/main/kotlin/com/linroid/ketch/cli/Main.kt b/cli/src/main/kotlin/com/linroid/ketch/cli/Main.kt index 69636583..637a9fdb 100644 --- a/cli/src/main/kotlin/com/linroid/ketch/cli/Main.kt +++ b/cli/src/main/kotlin/com/linroid/ketch/cli/Main.kt @@ -4,6 +4,7 @@ import ch.qos.logback.classic.Level import com.linroid.ketch.ai.AiConfig import com.linroid.ketch.ai.AiModule import com.linroid.ketch.ai.LlmConfig +import com.linroid.ketch.ai.SearchConfig import com.linroid.ketch.api.Destination import com.linroid.ketch.api.DownloadPriority import com.linroid.ketch.api.DownloadRequest @@ -524,6 +525,7 @@ private fun runAiDiscover(args: List) { val aiConfig = AiConfig( enabled = true, llm = LlmConfig(apiKey = apiKey), + search = resolveSearchConfig(), ) val aiModule = AiModule.create(aiConfig) @@ -606,6 +608,11 @@ private fun printUsage() { println(" --max-results Max results (default: 5)") println(" Requires OPENAI_API_KEY env var") println() + println(" Search env vars (checked in order):") + println(" BING_SEARCH_API_KEY Use Bing Web Search API") + println(" GOOGLE_SEARCH_API_KEY Use Google Custom Search") + println(" GOOGLE_SEARCH_CX Google Search Engine ID") + println() println("Examples:") println(" ketch https://example.com/file.zip") println(" ketch -v https://example.com/file.zip") @@ -646,6 +653,28 @@ private fun printServerUsage() { println(" ketch server --generate-config") } +/** + * Resolves [SearchConfig] from environment variables. + * + * Priority: `BING_SEARCH_API_KEY` > `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` > default. + */ +private fun resolveSearchConfig(): SearchConfig { + val bingKey = System.getenv("BING_SEARCH_API_KEY") + if (!bingKey.isNullOrBlank()) { + return SearchConfig(provider = "bing", apiKey = bingKey) + } + val googleKey = System.getenv("GOOGLE_SEARCH_API_KEY") + val googleCx = System.getenv("GOOGLE_SEARCH_CX") + if (!googleKey.isNullOrBlank() && !googleCx.isNullOrBlank()) { + return SearchConfig( + provider = "google", + apiKey = googleKey, + cx = googleCx, + ) + } + return SearchConfig() +} + private fun parsePriority(value: String): DownloadPriority? { return when (value.trim().lowercase()) { "low" -> DownloadPriority.LOW