Coverage for webapp/search/logic.py: 97%
65 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-27 22:07 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-27 22:07 +0000
1from flask_caching import Cache
4import requests
5from webapp.config import SEARCH_FIELDS
6from webapp.packages.logic import parse_package_for_card
7from webapp.observability.utils import trace_function
8from webapp.store_api import publisher_gateway
10DISCOURSE_URL = "https://discourse.charmhub.io"
11DOCS_URL = "https://canonical-juju.readthedocs-hosted.com/"
13cache = Cache(config={"CACHE_TYPE": "simple"})
16@trace_function
17def search_discourse(
18 query: str,
19 page: int = 1,
20 see_all: bool = False,
21) -> list:
22 """
23 Searches discourse for topics based on the query parameters.
25 Parameters:
26 term (str): The search term used to find relevant topics.
27 page (int): The page number of the search results to retrieve.
28 category (str): The category to search from.
29 see_all (bool, optional): If True, retrieves all available search
30 results. If False (default), returns a limited number of results
31 (5 posts and topics).
33 Returns:
34 list: A list containing the a list of topics.
36 Note:
37 This function makes use of a cache to store result for a fetched search
38 terms, this helps in reducing redundant requests to the discourse API.
39 """
40 cached_page = cache.get(f"{query}-{page}")
42 if not see_all:
43 if cached_page:
44 return cached_page
45 else:
46 resp = requests.get(
47 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"
48 )
49 topics = resp.json().get("topics", [])
50 for topic in topics:
51 post = next(
52 (
53 post
54 for post in resp.json()["posts"]
55 if post["topic_id"] == topic["id"]
56 ),
57 None,
58 )
59 topic["post"] = post
60 cache.set(f"{query}-{page}", topics, timeout=300)
61 return topics
63 # Note: this logic is currently slower than it should ordinarily
64 # be because the discourse API currently has some limitations that
65 # would probably be fixed in the near future.
66 # The ones affecting this code are:
67 # 1. The API does not return any indicator to show if there are more
68 # pages to be fetched.
69 # 2. The API does not support fetching multiple categories or
70 # excluding a category from the search
72 result = []
73 more_pages = True
75 while more_pages:
76 cached_page = cache.get(f"{query}-{page}")
77 if cached_page:
78 result.extend(cached_page)
79 page += 1
80 continue
82 resp = requests.get(
83 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"
84 )
85 data = resp.json()
86 topics = data.get("topics", [])
88 if topics:
89 for topic in topics:
90 post = next(
91 (
92 post
93 for post in data["posts"]
94 if post["topic_id"] == topic["id"]
95 ),
96 None,
97 )
98 topic["post"] = post
99 cache.set(f"{query}-{page}", topics, timeout=300)
100 result.extend(topics)
101 page += 1
102 next_resp = requests.get(
103 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"
104 )
105 next_topics = next_resp.json().get("topics", [])
106 if not next_topics or next_topics[0]["id"] == topics[0]["id"]:
107 more_pages = False
108 else:
109 more_pages = False
111 return result
114@trace_function
115def search_docs(term: str) -> dict:
116 """
117 Fetches documentation from discourse from the doc category and
118 a specific search term.
120 Parameters:
121 search_term (str): The search term used to find relevant documentation.
122 page (int): The page number of the search results to retrieve.
123 see_all (bool, optional): If True, retrieves all available search
124 results. If False (default), returns a limited number of results
125 (5 posts and topics).
127 Returns:
128 dict: A dictionary containing the retrieved dtopics.
129 """
131 search_url = (
132 f"{DOCS_URL}/_/api/v3/search/?q=project%3Acanonical-juju+{term}"
133 )
135 resp = requests.get(search_url)
136 data = resp.json()
138 results = data.get("results", [])
140 return results
143@trace_function
144def search_topics(term: str, page: int, see_all=False) -> dict:
145 """
146 Search discousre for a specific term and return the results.
147 It searches from all categories except doc category.
149 Parameters:
150 term (str): The search term used to find relevant documentation.
151 page (int): The page number of the search results to retrieve.
152 see_all (bool, optional): If True, retrieves all available search
153 results. If False (default), returns the first page only
155 Returns:
156 dict: A dictionary containing the retrieved topics.
157 """
158 query = term
160 result = search_discourse(query, page, see_all)
162 result = [topic for topic in result if topic["category_id"] != 22]
164 return result
167@trace_function
168def search_charms(term: str):
169 return [
170 parse_package_for_card(package)
171 for package in publisher_gateway.find(
172 term, type="charm", fields=SEARCH_FIELDS
173 )["results"]
174 ]
177@trace_function
178def search_bundles(term: str):
179 return [
180 parse_package_for_card(package)
181 for package in publisher_gateway.find(
182 term, type="bundle", fields=SEARCH_FIELDS
183 )["results"]
184 ]