Coverage for apps/ptf/solr/search_helpers.py: 93%

410 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import datetime 

2import re 

3import string 

4from urllib.parse import quote 

5from urllib.parse import quote_plus 

6from urllib.parse import urlparse 

7 

8from pysolr import Results 

9 

10from django.conf import settings 

11from django.http import Http404 

12from django.http import QueryDict 

13from django.urls import reverse 

14from django.utils import translation 

15 

16from ptf.display import resolver 

17from ptf.site_register import SITE_REGISTER 

18 

19 

20###################################################################### 

21# CLEANSEARCHURL 

22###################################################################### 

23class CleanSearchURL: 

24 """ 

25 CleanSearchURL: url like search /search/*-"ma collection"-qp 

26 first part of the url is the path followed by criterias and last part is the encoding of criteria 

27 criterias are split by CleanSearchURL.separator 

28 """ 

29 

30 separator = "-" 

31 

32 def __init__(self, base): 

33 self._base = base 

34 self._criteria = [] 

35 self._encoding = [] 

36 

37 def append(self, criteria, type): 

38 if len(self._criteria) != len(self._encoding): 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true

39 raise 

40 self._criteria.append(criteria) 

41 self._encoding.append(type) 

42 

43 def remove(self, criteria, type): 

44 """ 

45 9/03/2023 - UNUSED. 

46 The class is used as an object only once in `pretty_search` method (helpers.py). 

47 """ 

48 if len(self._criteria) != len(self._encoding): 

49 raise 

50 self._criteria.remove(criteria) 

51 self._encoding.remove(type) 

52 

53 @staticmethod 

54 def decode(clean_url: str, path="") -> tuple[str, QueryDict]: 

55 """ 

56 decode a pretty search url encoded like : 

57 search /1erterme-2erterme - 3 - Nom - 1986 -abpfg 

58 path all + author + page + facetAuteur + facetDate +manière dont est encodée la requete 

59 

60 

61 

62 Attention : pour les recherche en NOT, la lettre est en majuscule 

63 "q" : qti=all 

64 "a": qti=author 

65 "b": qti=titre 

66 "c": qti = date 

67 "d" : first date/last date formulaire de recherche 

68 "f" : bibliographie 

69 "g" : plein texte 

70 "k" : qti = keywords 

71 i-m : reservé pour la recherche en cas d'ajout de champs 

72 "n": facet auteur 

73 "o": facet year range 

74 "p": facet collection 

75 "r":firstletter 

76 "s": facet type document 

77 "t":page 

78 "u": classname (article type) 

79 "y": facet year 

80 

81 

82 @param clean_url : critere(s) et encoding séparé par des - ( CleanSearchURL.separator ) 

83 @param path : chemin de la recherche car peut être : /search /thesis /series etc. 

84 @return path, QueryDict: QueryDict : dict du type qt0: all, q0: 'larecherche' etc. 

85 """ 

86 q_index = 0 

87 my_dict = { 

88 "q": ["q{}={}&qt{}=all", r".*"], 

89 "a": ["q{}={}&qt{}=author", r"\D*"], 

90 "b": ["q{}={}&qt{}=title", r".*"], 

91 "c": ["q{}={}&qt{}=author_ref", r"\D*"], 

92 "d": ["q-f-{}={}&q-l-{}={}&qt{}=date", r"\[(\d{4}|\*|) TO (\d{4}|\*|)\]"], 

93 "f": ["q{}={}&qt{}=references", r".*"], 

94 "g": ["q{}={}&qt{}=body", ".*"], 

95 "k": ["q{}={}&qt{}=kwd", ".*"], 

96 "m": ["f=msc_facet:{}", r".*"], 

97 "n": ["f=ar:{}", r"\D*"], 

98 "o": ["f=year_facet:{}", r"\[\d{4} TO \d{4}\]"], 

99 "p": ["f=collection_title_facet:{}", r".*"], 

100 # "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r'\"?[A-Z]{1}\"?'], 

101 # FIXME : a supprimer après sûr que les " autour de firstLetter ne sont pas nécessaires 

102 "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r"[A-Z]{1}"], 

103 "s": ["f=dt:{}", r".*"], 

104 "t": ["page={}", r"\d*"], 

105 "u": ["f=classname:{}", r".*"], 

106 "y": ["f=year_facet:{}", r"\"\d{4}\""], 

107 } 

108 

109 criteria = [] 

110 chaine = "" 

111 inQuote = False 

112 # on itère sur clean_url pour séparer les critères en reconnaissant des chaînes de caractères protégées 

113 # par des " 

114 # 

115 try: 

116 for i in range(len(clean_url)): 

117 c = clean_url[i] 

118 # print(c) 

119 if inQuote or c != CleanSearchURL.separator: 

120 chaine += c 

121 

122 if ( 

123 c == '"' 

124 and (i == 0 or clean_url[i - 1] == CleanSearchURL.separator) 

125 and inQuote is False 

126 ): 

127 # Debut de critere entre quote 

128 inQuote = True 

129 elif c == '"' and clean_url[i + 1] == CleanSearchURL.separator and inQuote is True: 

130 # Fin de critere entre quote 

131 criteria.append(chaine) 

132 inQuote = False 

133 chaine = "" 

134 elif ( 

135 c == CleanSearchURL.separator and inQuote is False and clean_url[i - 1] != '"' 

136 ): 

137 # Fin de critere sans quote et le critère n'était pas entouré de quote 

138 criteria.append(chaine) 

139 chaine = "" 

140 

141 criteria.append(chaine) 

142 

143 # encodage est le dernier critere 

144 encoding = criteria[-1] 

145 criteria = criteria[:-1] 

146 

147 encoding = list(encoding) 

148 query = zip(encoding, criteria) 

149 query_string = "" 

150 # pour chaque critere, on crée la requête orientée solr associée 

151 for encoding_key, criteria_value in query: 

152 if criteria_value != "": 152 ↛ 151line 152 didn't jump to line 151, because the condition on line 152 was never false

153 # on test si le critere respecte la regexp associée 

154 reg_str = my_dict[encoding_key.lower()] 

155 p = re.compile(reg_str[1]) 

156 if p.match(criteria_value): 

157 # criteria_value must be url encoded to pass to QueryDict 

158 if encoding_key.lower() in ["d"]: 

159 # on traite un intervalle de date 

160 begin, end = criteria_value.strip("[]").split(" TO ") 

161 query_string += "&" + my_dict[encoding_key.lower()][0].format( 

162 q_index, begin, q_index, end, q_index 

163 ) 

164 elif encoding_key.lower() in ["q", "a", "b", "c", "f", "k", "g"]: 

165 query_string += "&" + ( 

166 my_dict[encoding_key.lower()][0].format( 

167 q_index, quote_plus(criteria_value), q_index 

168 ) 

169 ) 

170 if encoding_key.lower() != encoding_key: 

171 # on est dans le cas d'un NOT -> la clef est en 

172 # majuscule 

173 query_string += f"&not{q_index}=on" 

174 q_index += 1 

175 else: 

176 query_string += "&" + ( 

177 my_dict[encoding_key.lower()][0].format(quote_plus(criteria_value)) 

178 ) 

179 

180 querydict = QueryDict(query_string.encode("utf-8")) 

181 return path, querydict 

182 except Exception: 

183 raise Http404() 

184 

185 @staticmethod 

186 def encode(dict: QueryDict, path="") -> str: 

187 """ 

188 encode QueryDict request in CleanURL 

189 @param QueryDict: POST request from search form 

190 @return: clean search absolute url 

191 """ 

192 

193 criteria = [] 

194 encoding = [] 

195 # a priori les filtres seront passés en GET uniquement 

196 # filters = [] 

197 # filters = request.POST.getlist('f') 

198 

199 i = 0 

200 qti = dict.get("qt" + str(i), None) 

201 

202 while qti: 

203 qi = dict.get("q" + str(i), None) 

204 if qti == "all": 

205 criteria.append(qi) 

206 encoding.append("q") 

207 elif qti == "author": 

208 criteria.append(qi) 

209 encoding.append("a") 

210 elif qti == "author_ref": 

211 criteria.append(qi) 

212 encoding.append("c") 

213 elif qti == "title": 

214 criteria.append(qi) 

215 encoding.append("b") 

216 elif qti == "date": 

217 qfi = dict.get("q-f-" + str(i), "*") 

218 qli = dict.get("q-l-" + str(i), "*") 

219 criteria.append(f"[{qfi} TO {qli}]") 

220 encoding.append("d") 

221 elif qti == "references": 

222 criteria.append(qi) 

223 encoding.append("f") 

224 elif qti == "body": 

225 criteria.append(qi) 

226 encoding.append("g") 

227 elif qti == "kwd": 227 ↛ 233line 227 didn't jump to line 233, because the condition on line 227 was never false

228 criteria.append(qi) 

229 encoding.append("k") 

230 # if qti == 'author_ref': 

231 # keep_qs_in_display = False 

232 

233 noti = dict.get("not" + str(i), None) 

234 if noti == "on": 

235 encoding[len(encoding) - 1] = encoding[len(encoding) - 1].upper() 

236 

237 i += 1 

238 qti = dict.get("qt" + str(i), None) 

239 

240 # on traite les filtres 

241 # "n": "f=ar:'{}'", 

242 # "o": "f=year_facet:'{}'", 

243 # "y": "f=year_facet:'{}'", 

244 # "p": "f=collection_title_facet:'{}'", 

245 # "r": "f={!tag=firstletter}firstNameFacetLetter:'{}'", 

246 # "s": "f=dt:'{}'", 

247 # "u": "f=classname:'{}'", 

248 filters = dict.getlist("f") 

249 for filter in filters: 

250 key, value = filter.split(":", 1) 

251 if key == "collection_title_facet": 

252 criteria.append(value) 

253 encoding.append("p") 

254 elif key == "ar": 

255 criteria.append(value) 

256 encoding.append("n") 

257 elif key == "year_facet": 

258 criteria.append(value) 

259 if value[0] == "[": 

260 encoding.append("o") 

261 else: 

262 encoding.append("y") 

263 elif key == "{!tag=firstletter}firstNameFacetLetter": 

264 criteria.append(value) 

265 encoding.append("r") 

266 elif key == "dt": 

267 criteria.append(value) 

268 encoding.append("s") 

269 elif key == "classname": 269 ↛ 271line 269 didn't jump to line 271, because the condition on line 269 was never true

270 # Used for article types 

271 criteria.append(value) 

272 encoding.append("u") 

273 elif key == "msc_facet": 

274 criteria.append(value) 

275 encoding.append("m") 

276 

277 # on traite la pagination 

278 # "t": "page={}" 

279 page = dict.get("page") 

280 if page: 

281 criteria.append(page) 

282 encoding.append("t") 

283 

284 if not criteria: 

285 return path 

286 for i in range(len(criteria)): 

287 if criteria[i] and criteria[i][0] == '"' and criteria[i][-1] == '"': # critere protege 

288 pass 

289 elif CleanSearchURL.separator in criteria[i] or '"' in criteria[i]: 289 ↛ 290line 289 didn't jump to line 290, because the condition on line 289 was never true

290 criteria[i] = f'"{criteria[i]}"' 

291 

292 clean_url = "".join( 

293 [ 

294 CleanSearchURL.separator.join(quote(item, "") for item in criteria), 

295 CleanSearchURL.separator, 

296 "".join(encoding), 

297 ] 

298 ) 

299 path = path.strip("/") 

300 if path: 

301 return "/" + path + "/" + clean_url 

302 return clean_url 

303 

304 def to_href(self): 

305 clean_url = ( 

306 self._base 

307 + "/" 

308 + self.separator.join(quote(item, "") for item in self._criteria) 

309 + self.separator 

310 + "".join(self._encoding) 

311 ) 

312 return clean_url 

313 

314 

315###################################################################### 

316# FACETS & SEARCH RESULTS 

317###################################################################### 

318class Facet: 

319 """ 

320 Facet: a filter that you can select to narrow your search 

321 Example: "Journal article(25)" is a filter (of the colid_facets category) 

322 

323 properties: 

324 name Ex: "Journal article" 

325 count Ex: 25 

326 active Ex: True (tells if the user has selected the filter) 

327 href The url to set on the filter. 

328 It concatenates the filters selected before 

329 Ex: http://www.numdam.org/items/?q=au:(choquet)&f=dt:%22Journal%20article%22&f=year:1991 

330 

331 Facets are returned in the SearchResultsGenerator 

332 """ 

333 

334 def __init__( 

335 self, 

336 name: str, 

337 count: int, 

338 state: str, 

339 filters: set[str] = set(), 

340 path="/search/", 

341 sub_facets=[], 

342 ): 

343 self.name = name 

344 self.count = count 

345 self.active = state 

346 self.sub_facets = sub_facets 

347 if filters: 

348 query = "&f=".join([quote_plus(x) for x in filters]) 

349 # query est du type /search/?q=test&qt=all&f=.... 

350 query = f"{path}&f={query}" 

351 else: 

352 query = path 

353 url = urlparse(query) 

354 params = url.query 

355 params = params.encode("utf-8") 

356 dict = QueryDict(params, True, "utf-8") 

357 href = CleanSearchURL.encode(dict, url.path) 

358 if hasattr(settings, "SITE_URL_PREFIX"): 

359 href = f"/{settings.SITE_URL_PREFIX}" + href 

360 self.href = href 

361 

362 

363def create_facets_in_category( 

364 solr_results: Results, 

365 category: str, 

366 active_filters: set[str], 

367 path: str, 

368 sort=False, 

369 reverse=False, 

370) -> list[Facet]: 

371 # Solr returns filters in a flat list 

372 # Example: facet_fields : { "year": [ "1878",1,"1879",0,"1912",3,"1971",5] } 

373 # To simplify the creation of Facet objects, we need a list of pairs: [ ('1878',1),('1879',2),...] 

374 # To do so, we use 

375 # 1) The python slice syntax on lists [start:stop:step] 

376 # f[0::2] => [ '1878', '1879',...] 

377 # f[1::2] => [1,2,...] 

378 # 2) The python list comprehensions [ expression for ... if ... ] 

379 # [f[i::2 for i in range(2)] => [ ['1878','1879',...], [1,2,...] ] 

380 # 3) zip(*list) to unzip a list (see more details below) 

381 # => [ ('1878',1), ('1879',2), ... ] 

382 # 

383 # zip(*list) <=> unzip. Why ? 

384 # zip() pairs up the elements from all inputs 

385 # zip( lista, listb, ... listz ) => ( (a1,b1,...z1), (a2,b2,...,z2), ..., (an,bn,...,zn) ) 

386 # The output is a tuple (unmutable list) 

387 # To recreate the lista, listb, you can re-apply zip on the elements of the tuple. 

388 # But you have to unpack the tuple first (to recreate multiple arguments) 

389 # *(tuple) creates multiple (ai,bi,...zi) lists 

390 # zip(*tuple) combines the list 

391 # The output is ( (a1,a2,...,an), (b1,b2,...bn), ..., (z1,z2,...,zn) ) 

392 

393 if category not in solr_results.facets["facet_fields"]: 

394 return [] 

395 

396 f = solr_results.facets["facet_fields"][category] 

397 solr_facets = list(zip(*[f[i::2] for i in range(2)])) 

398 

399 if sort: 

400 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=reverse) 

401 

402 results = [] 

403 active_filters = active_filters.copy() 

404 

405 if category == "year_facet": 

406 # Selecting a year facet clears the active year range facet (if any) 

407 mylist = [v for v in active_filters if "year_facet:[" in v] 

408 if mylist: 

409 active_filters.remove(mylist[0]) 

410 

411 if category == "ar": 

412 my_list = [v for v in active_filters if "ar:" in v] 

413 if my_list: 

414 ar_active_filter = my_list[0] 

415 else: 

416 ar_active_filter = None 

417 

418 for facet_name, count in solr_facets: 

419 this_filters = active_filters.copy() 

420 v = '{}:"{}"'.format(category, facet_name.replace('"', '\\"')) 

421 if category == "sites": 

422 facet_name = [ 

423 SITE_REGISTER[key]["name"] 

424 for key in SITE_REGISTER 

425 if str(SITE_REGISTER[key]["site_id"]) == facet_name 

426 ][0] 

427 if v in active_filters: 

428 this_filters.remove(v) 

429 results.append(Facet(facet_name, count, "active", this_filters, path)) 

430 else: 

431 # on n'autorise pas la multiple selection de facet auteur 

432 if category == "ar" and ar_active_filter is not None: 

433 this_filters.remove(ar_active_filter) 

434 this_filters.add(v) 

435 results.append(Facet(facet_name, count, "not-active", this_filters, path)) 

436 

437 return results 

438 

439 

440def create_year_range_facets( 

441 solr_results: Results, year_facets: list[Facet], active_filters: set[str], path: str 

442) -> list[Facet]: 

443 gap = solr_results.facets["facet_ranges"]["year_facet"]["gap"] 

444 f = solr_results.facets["facet_ranges"]["year_facet"]["counts"] 

445 solr_facets = list(zip(*[f[i::2] for i in range(2)])) 

446 

447 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=True) 

448 

449 results = [] 

450 

451 now = datetime.datetime.now() 

452 i = 0 # current year_facet index 

453 year_facets_size = len(year_facets) 

454 

455 for facet_name, count in solr_facets: 

456 start = facet_name 

457 start_i = int(start) 

458 end_i = int(facet_name) + gap - 1 

459 end = str(end_i) 

460 

461 if end_i > now.year: 461 ↛ 462line 461 didn't jump to line 462, because the condition on line 461 was never true

462 end = str(now.year) 

463 

464 # year_facets become sub_facets of a year_range_facet 

465 # We need to find the year_facets that are inside the year_range_facet 

466 if i < year_facets_size: 466 ↛ 470line 466 didn't jump to line 470, because the condition on line 466 was never false

467 yf = year_facets[i] 

468 year = int(yf.name) 

469 

470 sub_year_facets = [] 

471 this_filters = active_filters.copy() 

472 

473 while i < year_facets_size and year >= start_i: 

474 sub_year_facets.append(yf) 

475 

476 # If we click on a year range facet, we clear the active year facet 

477 # (if any) 

478 v = 'year_facet:"' + yf.name + '"' 

479 if v in active_filters: 479 ↛ 480line 479 didn't jump to line 480, because the condition on line 479 was never true

480 this_filters.remove(v) 

481 

482 i += 1 

483 if i < year_facets_size: 

484 yf = year_facets[i] 

485 year = int(yf.name) 

486 

487 facet_name = facet_name + "-" + str(end) 

488 v = "year_facet:[" + start + " TO " + end + "]" 

489 

490 if v in active_filters: 490 ↛ 491line 490 didn't jump to line 491, because the condition on line 490 was never true

491 this_filters.remove(v) 

492 results.append(Facet(facet_name, count, "active", this_filters, path, sub_year_facets)) 

493 else: 

494 this_filters.add(v) 

495 results.append( 

496 Facet(facet_name, count, "not-active", this_filters, path, sub_year_facets) 

497 ) 

498 

499 return results 

500 

501 

502def create_facets( 

503 solr_results: Results, path: str, filters: list[str], use_ar_facet=True 

504) -> dict[str, list[Facet]]: 

505 active_filters = set(filters) 

506 

507 atype_facets = create_facets_in_category(solr_results, "classname", active_filters, path) 

508 author_facets = [] 

509 if use_ar_facet: 

510 author_facets = create_facets_in_category(solr_results, "ar", active_filters, path) 

511 dt_facets = create_facets_in_category(solr_results, "dt", active_filters, path) 

512 msc_facets = create_facets_in_category(solr_results, "msc_facet", active_filters, path) 

513 collection_facets = create_facets_in_category( 

514 solr_results, "collection_title_facet", active_filters, path 

515 ) 

516 sites_facets = create_facets_in_category(solr_results, "sites", active_filters, path) 

517 

518 year_facets = create_facets_in_category( 

519 solr_results, "year_facet", active_filters, path, sort=True, reverse=True 

520 ) 

521 if len(year_facets) == 1 and year_facets[0].active == "active": 521 ↛ 522line 521 didn't jump to line 522, because the condition on line 521 was never true

522 year_range_facets = year_facets 

523 else: 

524 year_range_facets = create_year_range_facets( 

525 solr_results, year_facets, active_filters, path 

526 ) 

527 

528 return { 

529 "author_facets": author_facets, 

530 "msc_facets": msc_facets, 

531 "year_range_facets": year_range_facets, 

532 "dt_facets": dt_facets, 

533 "atype_facets": atype_facets, 

534 "collection_facets": collection_facets, 

535 "sites_facets": sites_facets, 

536 } 

537 

538 

539class SearchResults: 

540 """ 

541 Search results. 

542 Hold data returned by Solr 

543 Intermediary between solr_results and the Django template to display the results 

544 """ 

545 

546 # def __init__(self, solr_results, path, filters, sort): -> si activation 

547 # du tri 

548 

549 def fix_truncated_value(self, value: str): 

550 """ 

551 Highlighting may produce an HTML string truncated at the end. 

552 To display the search keywords in bold, we add <strong> around them. 

553 But we ask the template to display the highlight as |safe such that 

554 unclosed HTML tags will damage the HTML page layout. 

555 => fix_trunctated_value attempt to add missing HTML end tags. 

556 

557 9/03/2023 - This cannot work properly. We should use a parser or something 

558 to correctly do this. 

559 """ 

560 keywords = [] 

561 i = 0 

562 quote = "" 

563 while i < len(value): 

564 if value[i] == '"': 

565 if quote == '"': 

566 quote = "" 

567 else: 

568 quote = '"' 

569 elif value[i] == "'": 569 ↛ 570line 569 didn't jump to line 570, because the condition on line 569 was never true

570 if quote == "'": 

571 quote = "" 

572 else: 

573 quote = "'" 

574 

575 keyword = "" 

576 end_keyword = False 

577 if not quote and value[i] == "<": 

578 i += 1 

579 

580 if i < len(value) and value[i] == "/": 

581 end_keyword = True 

582 i += 1 

583 

584 while i < len(value) and value[i] != " " and value[i] != ">": 

585 keyword += value[i] 

586 i += 1 

587 

588 if keyword and end_keyword: 

589 if len(keywords) > 0 and keywords[-1] == keyword: 589 ↛ 594line 589 didn't jump to line 594, because the condition on line 589 was never false

590 keywords.pop(-1) 

591 elif keyword: 591 ↛ 594line 591 didn't jump to line 594, because the condition on line 591 was never false

592 keywords.append(keyword) 

593 

594 i += 1 

595 

596 if quote: 596 ↛ 597line 596 didn't jump to line 597, because the condition on line 596 was never true

597 value += quote 

598 

599 while len(keywords) > 0: 

600 keyword = keywords.pop(-1) 

601 value += "</" + keyword + ">" 

602 

603 return value 

604 

605 def __init__(self, solr_results: Results, path: str, filters: list[str], use_ar_facet=True): 

606 self.facets = create_facets(solr_results, path, filters, use_ar_facet) 

607 self.hits = solr_results.hits 

608 self.docs = solr_results.docs 

609 

610 cur_language = translation.get_language() 

611 preferred_highlight_keywords = [ 

612 "abstract_tex", 

613 "trans_abstract_tex", 

614 "kwd", 

615 "trans_kwd", 

616 "body", 

617 "bibitem", 

618 ] 

619 if cur_language != "fr": 

620 preferred_highlight_keywords = [ 

621 "trans_abstract_tex", 

622 "abstract_tex", 

623 "trans_kwd", 

624 "kwd", 

625 "body", 

626 "bibitem", 

627 ] 

628 

629 # We do not call the translation mechanism on a specific language 

630 # try: 

631 # translation.activate('en') 

632 # text = translation.gettext(u"Résumé") 

633 # finally: 

634 # translation.activate(cur_language) 

635 

636 # We get the translation based on the current language 

637 abstract_text = translation.gettext("Résumé") 

638 reference_text = translation.gettext("Bibliographie") 

639 keywords_text = translation.gettext("Mots clés") 

640 fulltext_text = translation.gettext("Plein texte") 

641 

642 correspondance = { 

643 "abstract_tex": abstract_text, 

644 "trans_abstract_tex": abstract_text, 

645 "kwd": keywords_text, 

646 "trans_kwd": keywords_text, 

647 "body": fulltext_text, 

648 "bibitem": reference_text, 

649 } 

650 

651 for index, doc in enumerate(self.docs): 

652 id_doc = doc["id"] 

653 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"]) 

654 hl = solr_results.highlighting[id_doc] 

655 for key in ["au", "year"]: 

656 if key in hl: 

657 the_hl = hl[key][0] 

658 the_hl = the_hl.replace("<strong>", "") 

659 the_hl = the_hl.replace("</strong>", "") 

660 value = doc[key] 

661 pos = value.find(the_hl) 

662 if pos > -1: 662 ↛ 665line 662 didn't jump to line 665, because the condition on line 662 was never false

663 value = value.replace(the_hl, hl[key][0]) 

664 

665 doc[key] = value 

666 

667 for key in ["collection_title_tex", "collection_title_html"]: 

668 value = doc[key][0] 

669 if key in hl: 

670 the_hl = hl[key][0] 

671 the_hl = the_hl.replace("<strong>", "") 

672 the_hl = the_hl.replace("</strong>", "") 

673 pos = value.find(the_hl) 

674 if pos > -1: 674 ↛ 677line 674 didn't jump to line 677, because the condition on line 674 was never false

675 value = value.replace(the_hl, hl[key][0]) 

676 

677 doc[key] = value 

678 

679 for key in preferred_highlight_keywords: 

680 if key in hl: 

681 doc["highlighting"] = {} 

682 doc["highlighting"]["field"] = correspondance[key] 

683 doc["highlighting"]["value"] = "" 

684 for value in hl[key]: 

685 if key == "bibitem": 

686 value = self.fix_truncated_value(value) 

687 doc["highlighting"]["value"] = ( 

688 doc["highlighting"]["value"] + "... " + value + " ...<br>" 

689 ) 

690 break 

691 # TODO: on ne veut le hl sur bibitem voire plein text que 

692 # si il n'y a que ca qui matche 

693 

694 if settings.SITE_NAME == "cr" and "sites" in doc and doc["sites"]: 

695 site_id = doc["sites"][0] 

696 site_domain = [ 

697 SITE_REGISTER[key]["site_domain"] 

698 for key in SITE_REGISTER 

699 if SITE_REGISTER[key]["site_id"] == site_id 

700 ][0] 

701 prefix = site_domain.split("/")[1] 

702 if "doi" in doc: 702 ↛ 703line 702 didn't jump to line 703, because the condition on line 702 was never true

703 url = reverse("article", kwargs={"aid": doc["doi"]}) 

704 else: 

705 url = reverse("item_id", kwargs={"pid": doc["pid"]}) 

706 doc_url = "/" + prefix + url 

707 doc["item_url"] = doc_url 

708 if doc["pdf"].find("/" + prefix) != 0: 708 ↛ 718line 708 didn't jump to line 718, because the condition on line 708 was never false

709 doc["pdf"] = "/" + prefix + doc["pdf"] 

710 if "tex" in doc: 

711 doc["tex"] = "/" + prefix + doc["tex"] 

712 elif hasattr(settings, "SITE_URL_PREFIX"): 

713 if doc["pdf"].find("/" + settings.SITE_URL_PREFIX) != 0: 713 ↛ 718line 713 didn't jump to line 718, because the condition on line 713 was never false

714 doc["pdf"] = "/" + settings.SITE_URL_PREFIX + doc["pdf"] 

715 if "tex" in doc: 715 ↛ 718line 715 didn't jump to line 718, because the condition on line 715 was never false

716 doc["tex"] = "/" + settings.SITE_URL_PREFIX + doc["tex"] 

717 

718 self.docs[index] = doc 

719 

720 self.filters = "&f=".join(filters) 

721 # self.sort = sort -> si activation du tri 

722 

723 

724class SearchInternalResults: 

725 """ 

726 Search results for sorted Books. 

727 Hold data returned by Solr 

728 Intermediary between solr_results and the Django template to display the results 

729 """ 

730 

731 # def __init__(self, solr_results, path, filters, sort): -> si activation 

732 # du tri 

733 def __init__( 

734 self, solr_results: Results, path: str, filters: list[str], facet_fields: list[str] 

735 ): 

736 year_range_facets = None 

737 letter_facets = None 

738 collection_facets = None 

739 author_facets = None 

740 

741 firstletterFilter = "" 

742 this_filters = set(filters).copy() 

743 

744 if "collection_title_facet" in facet_fields: 744 ↛ 749line 744 didn't jump to line 749, because the condition on line 744 was never false

745 collection_facets = create_facets_in_category( 

746 solr_results, "collection_title_facet", this_filters, path 

747 ) 

748 

749 if "author_facet" in facet_fields: 

750 author_facets = create_facets_in_category(solr_results, "ar", this_filters, path) 

751 

752 if "year_facet" in facet_fields: 752 ↛ 763line 752 didn't jump to line 763, because the condition on line 752 was never false

753 year_facets = create_facets_in_category( 

754 solr_results, "year_facet", this_filters, path, sort=True, reverse=True 

755 ) 

756 if len(year_facets) == 1 and year_facets[0].active == "active": 756 ↛ 757line 756 didn't jump to line 757, because the condition on line 756 was never true

757 year_range_facets = year_facets 

758 else: 

759 year_range_facets = create_year_range_facets( 

760 solr_results, year_facets, this_filters, path 

761 ) 

762 

763 if "firstLetter" in facet_fields: 763 ↛ 796line 763 didn't jump to line 796

764 for filter in filters: 

765 if filter.startswith("{!tag=firstletter}firstNameFacetLetter:"): 

766 this_filters.remove(filter) 

767 firstletterFilter = filter 

768 

769 f = solr_results.facets["facet_fields"]["firstNameFacetLetter"] 

770 solr_facets = dict(zip(f[0::2], f[1::2])) 

771 

772 letter_facets = [] 

773 no_letter_selected = True 

774 for lettre in string.ascii_uppercase: 

775 v = f"{{!tag=firstletter}}firstNameFacetLetter:{lettre}" 

776 if lettre in solr_facets and v == firstletterFilter: 

777 # on est dans le cas où la lettre est dans les résultats de 

778 # recherche et le filtre est activé 

779 letter_facets.append( 

780 Facet(lettre, solr_facets[lettre], "active", this_filters, path) 

781 ) 

782 no_letter_selected = False 

783 elif lettre in solr_facets: 

784 my_filters = this_filters.copy() 

785 my_filters.add(v) 

786 letter_facets.append( 

787 Facet(lettre, solr_facets[lettre], "not-active", my_filters, path) 

788 ) 

789 else: 

790 letter_facets.append(Facet(lettre, 0, "disabled", "", path)) 

791 if no_letter_selected: 

792 letter_facets.append(Facet("All", 1, "active", this_filters, path)) 

793 else: 

794 letter_facets.append(Facet("All", 1, "not-active", this_filters, path)) 

795 

796 self.facets = { 

797 "year_range_facets": year_range_facets, 

798 "letter_facets": letter_facets, 

799 "collection_title_facets": collection_facets, 

800 "author_facets": author_facets, 

801 } 

802 

803 self.hits = solr_results.hits 

804 self.docs = solr_results.docs 

805 self.filters = "&f=".join(filters) 

806 # self.sort = sort -> si activation du tri 

807 

808 for index, doc in enumerate(self.docs): 

809 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"]) 

810 self.docs[index] = doc