Coverage for webapp/search/logic.py: 97%

1from flask_caching import Cache

4import requests

5from webapp.config import SEARCH_FIELDS

6from webapp.packages.logic import parse_package_for_card

7from webapp.observability.utils import trace_function

8from webapp.store_api import publisher_gateway

10DISCOURSE_URL = "https://discourse.charmhub.io"

11DOCS_URL = "https://canonical-juju.readthedocs-hosted.com/"

13cache = Cache(config={"CACHE_TYPE": "simple"})

16@trace_function

17def search_discourse(

18 query: str,

19 page: int = 1,

20 see_all: bool = False,

21) -> list:

22 """

23 Searches discourse for topics based on the query parameters.

25 Parameters:

26 term (str): The search term used to find relevant topics.

27 page (int): The page number of the search results to retrieve.

28 category (str): The category to search from.

29 see_all (bool, optional): If True, retrieves all available search

30 results. If False (default), returns a limited number of results

31 (5 posts and topics).

33 Returns:

34 list: A list containing the a list of topics.

36 Note:

37 This function makes use of a cache to store result for a fetched search

38 terms, this helps in reducing redundant requests to the discourse API.

39 """

40 cached_page = cache.get(f"{query}-{page}")

42 if not see_all:

43 if cached_page:

44 return cached_page

45 else:

46 resp = requests.get(

47 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"

48 )

49 topics = resp.json().get("topics", [])

50 for topic in topics:

51 post = next(

52 (

53 post

54 for post in resp.json()["posts"]

55 if post["topic_id"] == topic["id"]

56 ),

57 None,

58 )

59 topic["post"] = post

60 cache.set(f"{query}-{page}", topics, timeout=300)

61 return topics

63 # Note: this logic is currently slower than it should ordinarily

64 # be because the discourse API currently has some limitations that

65 # would probably be fixed in the near future.

66 # The ones affecting this code are:

67 # 1. The API does not return any indicator to show if there are more

68 # pages to be fetched.

69 # 2. The API does not support fetching multiple categories or

70 # excluding a category from the search

72 result = []

73 more_pages = True

75 while more_pages:

76 cached_page = cache.get(f"{query}-{page}")

77 if cached_page:

78 result.extend(cached_page)

79 page += 1

80 continue

82 resp = requests.get(

83 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"

84 )

85 data = resp.json()

86 topics = data.get("topics", [])

88 if topics:

89 for topic in topics:

90 post = next(

91 (

92 post

93 for post in data["posts"]

94 if post["topic_id"] == topic["id"]

95 ),

96 None,

97 )

98 topic["post"] = post

99 cache.set(f"{query}-{page}", topics, timeout=300)

100 result.extend(topics)

101 page += 1

102 next_resp = requests.get(

103 f"{DISCOURSE_URL}/search.json?q={query}&page={page}"

104 )

105 next_topics = next_resp.json().get("topics", [])

106 if not next_topics or next_topics[0]["id"] == topics[0]["id"]:

107 more_pages = False

108 else:

109 more_pages = False

110

111 return result

112

113

114@trace_function

115def search_docs(term: str) -> dict:

116 """

117 Fetches documentation from discourse from the doc category and

118 a specific search term.

119

120 Parameters:

121 search_term (str): The search term used to find relevant documentation.

122 page (int): The page number of the search results to retrieve.

123 see_all (bool, optional): If True, retrieves all available search

124 results. If False (default), returns a limited number of results

125 (5 posts and topics).

126

127 Returns:

128 dict: A dictionary containing the retrieved dtopics.

129 """

130

131 search_url = (

132 f"{DOCS_URL}/_/api/v3/search/?q=project%3Acanonical-juju+{term}"

133 )

134

135 resp = requests.get(search_url)

136 data = resp.json()

137

138 results = data.get("results", [])

139

140 return results

141

142

143@trace_function

144def search_topics(term: str, page: int, see_all=False) -> dict:

145 """

146 Search discousre for a specific term and return the results.

147 It searches from all categories except doc category.

148

149 Parameters:

150 term (str): The search term used to find relevant documentation.

151 page (int): The page number of the search results to retrieve.

152 see_all (bool, optional): If True, retrieves all available search

153 results. If False (default), returns the first page only

154

155 Returns:

156 dict: A dictionary containing the retrieved topics.

157 """

158 query = term

159

160 result = search_discourse(query, page, see_all)

161

162 result = [topic for topic in result if topic["category_id"] != 22]

163

164 return result

165

166

167@trace_function

168def search_charms(term: str):

169 return [

170 parse_package_for_card(package)

171 for package in publisher_gateway.find(

172 term, type="charm", fields=SEARCH_FIELDS

173 )["results"]

174 ]

175

176

177@trace_function

178def search_bundles(term: str):

179 return [

180 parse_package_for_card(package)

181 for package in publisher_gateway.find(

182 term, type="bundle", fields=SEARCH_FIELDS

183 )["results"]

184 ]