Coverage for webapp/search/logic.py: 97%

65 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-27 22:07 +0000

1from flask_caching import Cache 

2 

3 

4import requests 

5from webapp.config import SEARCH_FIELDS 

6from webapp.packages.logic import parse_package_for_card 

7from webapp.observability.utils import trace_function 

8from webapp.store_api import publisher_gateway 

9 

10DISCOURSE_URL = "https://discourse.charmhub.io" 

11DOCS_URL = "https://canonical-juju.readthedocs-hosted.com/" 

12 

13cache = Cache(config={"CACHE_TYPE": "simple"}) 

14 

15 

16@trace_function 

17def search_discourse( 

18 query: str, 

19 page: int = 1, 

20 see_all: bool = False, 

21) -> list: 

22 """ 

23 Searches discourse for topics based on the query parameters. 

24 

25 Parameters: 

26 term (str): The search term used to find relevant topics. 

27 page (int): The page number of the search results to retrieve. 

28 category (str): The category to search from. 

29 see_all (bool, optional): If True, retrieves all available search 

30 results. If False (default), returns a limited number of results 

31 (5 posts and topics). 

32 

33 Returns: 

34 list: A list containing the a list of topics. 

35 

36 Note: 

37 This function makes use of a cache to store result for a fetched search 

38 terms, this helps in reducing redundant requests to the discourse API. 

39 """ 

40 cached_page = cache.get(f"{query}-{page}") 

41 

42 if not see_all: 

43 if cached_page: 

44 return cached_page 

45 else: 

46 resp = requests.get( 

47 f"{DISCOURSE_URL}/search.json?q={query}&page={page}" 

48 ) 

49 topics = resp.json().get("topics", []) 

50 for topic in topics: 

51 post = next( 

52 ( 

53 post 

54 for post in resp.json()["posts"] 

55 if post["topic_id"] == topic["id"] 

56 ), 

57 None, 

58 ) 

59 topic["post"] = post 

60 cache.set(f"{query}-{page}", topics, timeout=300) 

61 return topics 

62 

63 # Note: this logic is currently slower than it should ordinarily 

64 # be because the discourse API currently has some limitations that 

65 # would probably be fixed in the near future. 

66 # The ones affecting this code are: 

67 # 1. The API does not return any indicator to show if there are more 

68 # pages to be fetched. 

69 # 2. The API does not support fetching multiple categories or 

70 # excluding a category from the search 

71 

72 result = [] 

73 more_pages = True 

74 

75 while more_pages: 

76 cached_page = cache.get(f"{query}-{page}") 

77 if cached_page: 

78 result.extend(cached_page) 

79 page += 1 

80 continue 

81 

82 resp = requests.get( 

83 f"{DISCOURSE_URL}/search.json?q={query}&page={page}" 

84 ) 

85 data = resp.json() 

86 topics = data.get("topics", []) 

87 

88 if topics: 

89 for topic in topics: 

90 post = next( 

91 ( 

92 post 

93 for post in data["posts"] 

94 if post["topic_id"] == topic["id"] 

95 ), 

96 None, 

97 ) 

98 topic["post"] = post 

99 cache.set(f"{query}-{page}", topics, timeout=300) 

100 result.extend(topics) 

101 page += 1 

102 next_resp = requests.get( 

103 f"{DISCOURSE_URL}/search.json?q={query}&page={page}" 

104 ) 

105 next_topics = next_resp.json().get("topics", []) 

106 if not next_topics or next_topics[0]["id"] == topics[0]["id"]: 

107 more_pages = False 

108 else: 

109 more_pages = False 

110 

111 return result 

112 

113 

114@trace_function 

115def search_docs(term: str) -> dict: 

116 """ 

117 Fetches documentation from discourse from the doc category and 

118 a specific search term. 

119 

120 Parameters: 

121 search_term (str): The search term used to find relevant documentation. 

122 page (int): The page number of the search results to retrieve. 

123 see_all (bool, optional): If True, retrieves all available search 

124 results. If False (default), returns a limited number of results 

125 (5 posts and topics). 

126 

127 Returns: 

128 dict: A dictionary containing the retrieved dtopics. 

129 """ 

130 

131 search_url = ( 

132 f"{DOCS_URL}/_/api/v3/search/?q=project%3Acanonical-juju+{term}" 

133 ) 

134 

135 resp = requests.get(search_url) 

136 data = resp.json() 

137 

138 results = data.get("results", []) 

139 

140 return results 

141 

142 

143@trace_function 

144def search_topics(term: str, page: int, see_all=False) -> dict: 

145 """ 

146 Search discousre for a specific term and return the results. 

147 It searches from all categories except doc category. 

148 

149 Parameters: 

150 term (str): The search term used to find relevant documentation. 

151 page (int): The page number of the search results to retrieve. 

152 see_all (bool, optional): If True, retrieves all available search 

153 results. If False (default), returns the first page only 

154 

155 Returns: 

156 dict: A dictionary containing the retrieved topics. 

157 """ 

158 query = term 

159 

160 result = search_discourse(query, page, see_all) 

161 

162 result = [topic for topic in result if topic["category_id"] != 22] 

163 

164 return result 

165 

166 

167@trace_function 

168def search_charms(term: str): 

169 return [ 

170 parse_package_for_card(package) 

171 for package in publisher_gateway.find( 

172 term, type="charm", fields=SEARCH_FIELDS 

173 )["results"] 

174 ] 

175 

176 

177@trace_function 

178def search_bundles(term: str): 

179 return [ 

180 parse_package_for_card(package) 

181 for package in publisher_gateway.find( 

182 term, type="bundle", fields=SEARCH_FIELDS 

183 )["results"] 

184 ]