Coverage for apps/ptf/cmds/solr_cmds.py: 84%

463 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import pysolr 

2 

3from django.conf import settings 

4 

5from ptf.cmds.base_cmds import baseCmd 

6from ptf.cmds.base_cmds import make_int 

7from ptf.display import resolver 

8from ptf.site_register import SITE_REGISTER 

9from ptf.solr import search_helpers 

10from ptf.utils import get_display_name 

11 

12# Not used so far. 

13# nlm2solr use normalize-space for volume and volume-series, 

14# but make_int is called to convert into int: spaces are also trimmed 

15# def normalize_whitespace(str): 

16# import re 

17# str = str.strip() 

18# str = re.sub(r'\s+', ' ', str) 

19# return str 

20 

21 

22class solrFactory: 

23 solr = None 

24 solr_url = None 

25 

26 @staticmethod 

27 def get_solr(): 

28 if solrFactory.solr is None: 

29 if solrFactory.solr_url is None: 

30 solrFactory.solr_url = settings.SOLR_URL 

31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10) 

32 return solrFactory.solr 

33 

34 @staticmethod 

35 def do_solr_commit(): 

36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 return 

38 

39 solr = solrFactory.get_solr() 

40 solr.commit() 

41 

42 @staticmethod 

43 def do_solr_rollback(): 

44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 return 

46 

47 solr = solrFactory.get_solr() 

48 msg = "<rollback />" 

49 solr._update(msg) 

50 

51 @staticmethod 

52 def reset(): 

53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55, because the condition on line 53 was never false

54 solrFactory.solr.get_session().close() 

55 solrFactory.solr = None 

56 

57 

58def solr_add_contributors_to_data(contributors, data): 

59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data', because the condition on line 59 was never false

60 author_names = [] 

61 ar = [] 

62 aul = [] 

63 

64 for c in contributors: 

65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64, because the condition on line 65 was never false

66 display_name = get_display_name( 

67 "", c["first_name"], c["last_name"], "", c["string_name"] 

68 ) 

69 ref_name = c["mid"] if c["mid"] else display_name 

70 

71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73, because the condition on line 71 was never false

72 ar.append(ref_name) 

73 if display_name: 73 ↛ 75line 73 didn't jump to line 75, because the condition on line 73 was never false

74 author_names.append(display_name) 

75 if c["last_name"]: 

76 aul.append(c["last_name"]) 

77 

78 data["au"] = "; ".join(author_names) 

79 # auteurs de references 

80 data["ar"] = ar 

81 # Surnames / Lastnames / Nom de famille 

82 data["aul"] = aul 

83 

84 if author_names: 

85 data["fau"] = author_names[0] 

86 

87 

88def solr_add_kwds_to_data(kwds, data): 

89 data["kwd"] = ", ".join( 

90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"] 

91 ) 

92 data["trans_kwd"] = ", ".join( 

93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"] 

94 ) 

95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"] 

96 

97 

98##################################################################### 

99# 

100# solrCmd: base class for Solr commands 

101# 

102###################################################################### 

103class solrCmd(baseCmd): 

104 def __init__(self, params={}): 

105 super().__init__(params) 

106 

107 def do(self, parent=None): 

108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true

109 return None 

110 

111 return super().do(parent) 

112 

113 def post_do(self, resource=None): 

114 super().post_do(resource) 

115 

116 def undo(self): 

117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true

118 return None 

119 

120 return super().undo() 

121 

122 

123##################################################################### 

124# 

125# solrDeleteCmd: generic to delete Solr documents, based on a query 

126# 

127###################################################################### 

128class solrDeleteCmd(solrCmd): 

129 def __init__(self, params={}): 

130 self.commit = True 

131 self.q = None 

132 

133 super().__init__(params) 

134 

135 self.required_params.extend(["q"]) 

136 

137 def internal_do(self): 

138 super().internal_do() 

139 

140 solrFactory.get_solr().delete(q=self.q, commit=self.commit) 

141 

142 return None 

143 

144 

145##################################################################### 

146# 

147# solrAddCmd: base class for Solr Add commands 

148# 

149###################################################################### 

150class solrAddCmd(solrCmd): 

151 def __init__(self, params={}): 

152 self.commit = True 

153 self.db_obj = None 

154 self.id = None 

155 self.pid = None 

156 self.data = {} 

157 

158 super().__init__(params) 

159 

160 self.required_params.extend(["id", "pid"]) 

161 self.required_delete_params.extend(["id"]) 

162 

163 def pre_do(self): 

164 super().pre_do() 

165 

166 self.data["id"] = self.id 

167 self.data["pid"] = self.pid 

168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID 

169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do 

170 cmd = solrDeleteCmd({"q": "pid:" + self.pid}) 

171 cmd.do() 

172 

173 def internal_do(self): 

174 super().internal_do() 

175 

176 datas = [self.data] 

177 

178 solrFactory.get_solr().add(docs=datas, commit=self.commit) 

179 

180 return None 

181 

182 def internal_undo(self): 

183 id = super().internal_undo() 

184 

185 solrFactory.get_solr().delete(id=self.id, commit=self.commit) 

186 

187 return id 

188 

189 

190##################################################################### 

191# 

192# addResourceSolrCmd: base class for solrAddCmds adding a Resource 

193# 

194###################################################################### 

195class addResourceSolrCmd(solrAddCmd): 

196 def __init__(self, params={}): 

197 self.xobj = None # model_data object 

198 

199 # fields of the xobj to pass to SolR 

200 self.fields = [ 

201 "lang", 

202 "doi", 

203 "title_tex", 

204 "title_html", 

205 "trans_title_tex", 

206 "trans_title_html", 

207 "abstract_tex", 

208 "abstract_html", 

209 "trans_abstract_tex", 

210 "trans_abstract_html", 

211 "collection_title_tex", 

212 "collection_title_html", 

213 "collection_id", 

214 "year", 

215 "body", 

216 "bibitem", 

217 ] 

218 

219 # Used to filter the articles based on their site 

220 self.sites = None 

221 

222 super().__init__(params) 

223 

224 self.required_params.extend(["xobj"]) 

225 

226 def add_collection(self, collection): 

227 self.data["collection_id"] = collection.id 

228 

229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232, because the condition on line 229 was never false

230 self.data["collection_title_tex"] = [collection.title_tex] 

231 else: 

232 self.data["collection_title_tex"].append(collection.title_tex) 

233 

234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237, because the condition on line 234 was never false

235 self.data["collection_title_html"] = [collection.title_html] 

236 else: 

237 self.data["collection_title_html"].append(collection.title_html) 

238 

239 # classname is used only by PCJ for the article types 

240 if collection.coltype == "journal": 

241 self.data["dt"] = ["Article de revue"] 

242 elif collection.coltype == "acta": 

243 self.data["dt"] = ["Acte de séminaire"] 

244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true

245 self.data["classname"] = "Thèse" 

246 self.data["dt"] = ["Thèse"] 

247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true

248 self.data["classname"] = "Notes de cours" 

249 self.data["dt"] = ["Notes de cours"] 

250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251, because the condition on line 250 was never true

251 self.data["classname"] = "Acte de rencontre" 

252 self.data["dt"] = ["Acte de rencontre"] 

253 else: 

254 self.data["classname"] = "Livre" 

255 self.data["dt"] = ["Livre"] 

256 

257 def add_abstracts_to_data(self): 

258 for abstract in self.xobj.abstracts: 

259 lang = abstract["lang"] 

260 

261 for field_type in ["tex", "html"]: 

262 abstract_field = "value_" + field_type 

263 field_name = "abstract_" + field_type 

264 if lang != "fr": 

265 field_name = "trans_" + field_name 

266 

267 self.data[field_name] = abstract[abstract_field] 

268 

269 def add_year_to_data(self, year): 

270 if year: 

271 years = str(year).split("-") 

272 if len(years) > 1: 

273 self.data["year_facet"] = int(years[1]) 

274 else: 

275 self.data["year_facet"] = int(year) 

276 

277 def pre_do(self): 

278 super().pre_do() 

279 

280 for field in self.fields: 

281 if hasattr(self.xobj, field): 

282 self.data[field] = getattr(self.xobj, field) 

283 

284 self.add_abstracts_to_data() 

285 solr_add_kwds_to_data(self.xobj.kwds, self.data) 

286 solr_add_contributors_to_data(self.xobj.contributors, self.data) 

287 

288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289, because the condition on line 288 was never true

289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}") 

290 

291 # year either comes directly from xobj (container) or from set_container 

292 self.add_year_to_data(self.data["year"]) 

293 

294 if self.db_obj is not None: 

295 solr_fields = { 

296 "application/pdf": "pdf", 

297 "image/x.djvu": "djvu", 

298 "application/x-tex": "tex", 

299 } 

300 for stream in self.xobj.streams: 

301 mimetype = stream["mimetype"] 

302 if mimetype in solr_fields: 

303 href = self.db_obj.get_binary_file_href_full_path( 

304 "self", mimetype, stream["location"] 

305 ) 

306 self.data[solr_fields[mimetype]] = href 

307 

308 if self.db_obj is not None: 

309 self.data["wall"] = self.db_obj.get_wall() 

310 

311 if self.sites: 

312 self.data["sites"] = self.sites 

313 else: 

314 self.data["sites"] = [settings.SITE_ID] 

315 

316 

317##################################################################### 

318# 

319# addContainerSolrCmd: adds/remove a container (issue/book) 

320# 

321# A container needs a collection (collection_title_tex etc.) 

322# 

323###################################################################### 

324class addContainerSolrCmd(addResourceSolrCmd): 

325 def __init__(self, params={}): 

326 super().__init__(params) 

327 

328 self.fields.extend(["ctype"]) 

329 # self.data["dt"] = ["Livre"] 

330 

331 def pre_do(self): 

332 super().pre_do() 

333 

334 for field in ["volume", "number", "vseries"]: 

335 if hasattr(self.xobj, field): 

336 self.data["volume"] = make_int(getattr(self.xobj, field)) 

337 

338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0: 

339 incol = self.xobj.incollection[0] 

340 self.data["vseries"] = make_int(incol.vseries) 

341 self.data["volume"] = 0 

342 self.data["number"] = make_int(incol.volume) 

343 

344 # if incol.coltype == "theses": 

345 # self.data["dt"] = ["Thèse"] 

346 

347 

348##################################################################### 

349# 

350# addArticleSolrCmd: adds/remove an article 

351# 

352# an article needs a container (container_id) that needs a collection (collection_id) 

353# 

354###################################################################### 

355class addArticleSolrCmd(addResourceSolrCmd): 

356 def __init__(self, params={}): 

357 super().__init__(params) 

358 

359 self.fields.extend( 

360 ["page_range", "container_id", "volume", "number", "vseries", "article_number"] 

361 ) 

362 # self.data["dt"] = ["Article"] 

363 

364 def set_container(self, container): 

365 self.data["container_id"] = container.id 

366 self.data["year"] = container.year 

367 self.data["vseries"] = make_int(container.vseries) 

368 self.data["volume"] = make_int(container.volume) 

369 self.data["number"] = make_int(container.number) 

370 

371 def set_eprint(self, eprint): 

372 self.data["dt"].append("e-print") 

373 

374 def set_source(self, source): 

375 pass 

376 

377 def set_thesis(self, thesis): 

378 self.data["dt"].append("thesis") 

379 

380 def set_original_article(self, article): 

381 # TODO Replace some data (ie doi, pid) with the original article 

382 pass 

383 

384 def pre_do(self): 

385 super().pre_do() 

386 

387 self.data["classname"] = resolver.ARTICLE_TYPES.get( 

388 self.xobj.atype, "Article de recherche" 

389 ) 

390 

391 self.data["page_range"] = "" 

392 if not self.xobj.page_range: 

393 self.data["page_range"] = "p. " 

394 if self.xobj.fpage is not None: 394 ↛ 396line 394 didn't jump to line 396, because the condition on line 394 was never false

395 self.data["page_range"] += self.xobj.fpage 

396 if self.xobj.fpage and self.xobj.lpage: 

397 self.data["page_range"] += "-" 

398 if self.xobj.lpage is not None: 398 ↛ exitline 398 didn't return from function 'pre_do', because the condition on line 398 was never false

399 self.data["page_range"] += self.xobj.lpage 

400 elif self.xobj.page_range[0] != "p": 400 ↛ exitline 400 didn't return from function 'pre_do', because the condition on line 400 was never false

401 self.data["page_range"] = "p. " + self.xobj.page_range 

402 

403 

404##################################################################### 

405# 

406# addBookPartSolrCmd: adds/remove an book part (similar to an article) 

407# 

408# a book part needs a collection id (array) 

409# 

410###################################################################### 

411class addBookPartSolrCmd(addResourceSolrCmd): 

412 def __init__(self, params={}): 

413 super().__init__(params) 

414 

415 self.fields.extend( 

416 ["page_range", "container_title_tex", "container_title_html", "volume", "number"] 

417 ) 

418 # self.data["dt"] = ["Chapitre de livre"] 

419 

420 def set_container(self, container): 

421 self.data["container_id"] = container.id 

422 self.data["year"] = container.year 

423 self.data["volume"] = make_int(container.volume) 

424 self.data["number"] = make_int(container.number) 

425 self.data["container_title_tex"] = container.title_tex 

426 self.data["container_title_html"] = container.title_html 

427 

428 def pre_do(self): 

429 super().pre_do() 

430 

431 self.data["classname"] = "Chapitre de livre" 

432 

433 self.data["page_range"] = "" 

434 if not self.xobj.page_range: 434 ↛ 442line 434 didn't jump to line 442, because the condition on line 434 was never false

435 self.data["page_range"] = "p. " 

436 if self.xobj.fpage is not None: 436 ↛ 438line 436 didn't jump to line 438, because the condition on line 436 was never false

437 self.data["page_range"] += self.xobj.fpage 

438 if self.xobj.fpage and self.xobj.lpage: 

439 self.data["page_range"] += "-" 

440 if self.xobj.lpage is not None: 440 ↛ exitline 440 didn't return from function 'pre_do', because the condition on line 440 was never false

441 self.data["page_range"] += self.xobj.lpage 

442 elif self.xobj.page_range[0] != "p": 

443 self.data["page_range"] = "p. " + self.xobj.page_range 

444 

445 

446##################################################################### 

447# 

448# solrSearchCmd: 

449# 

450# called from ptf/views.py; SolrRequest(request, q, alias=alias, 

451# site=site, 

452# default={'sort': '-score'}) 

453# 

454# Warning: As of July 2018, only 1 site id is stored in a SolR document 

455# Although the SolR schema is already OK to store multiple sites ("sites" is an array) 

456# no Solr commands have been written to add/remove sites 

457# We only have add commands. 

458# Search only works if the Solr instance is meant for individual or ALL sites 

459# 

460###################################################################### 

461class solrSearchCmd(solrCmd): 

462 def __init__(self, params={}): 

463 # self.q = '*:*' 

464 self.q = "" 

465 self.qs = None 

466 self.filters = [] # TODO: implicit filters 

467 self.start = None 

468 self.rows = None 

469 self.sort = "-score" # use ',' to specify multiple criteria 

470 self.site = None 

471 self.search_path = "" 

472 

473 super().__init__(params) 

474 

475 self.required_params.extend(["qs"]) 

476 

477 def get_q(self, name, value, exclude, first, last): 

478 if name == "all" and value == "*": 

479 return "*:*" 

480 

481 if value == "*": 481 ↛ 482line 481 didn't jump to line 482, because the condition on line 481 was never true

482 value = "" 

483 

484 q = "" 

485 if exclude: 

486 q += "-" 

487 

488 if name == "date": 

489 q += "year:[" + first + " TO " + last + "]" 

490 

491 else: 

492 if name == "author": 

493 q += "au:" 

494 if name == "author_ref": 

495 q += "ar:" 

496 elif name == "title": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 q += "title_tex:" 

498 elif name == "body": 498 ↛ 499line 498 didn't jump to line 499, because the condition on line 498 was never true

499 q += "body:" 

500 elif name == "references": 500 ↛ 501line 500 didn't jump to line 501, because the condition on line 500 was never true

501 q += "bibitem:" 

502 elif name == "kwd": 502 ↛ 503line 502 didn't jump to line 503, because the condition on line 502 was never true

503 q += "kwd:" 

504 if len(value) > 0 and value[0] == '"' and value[-1] == '"': 

505 q += value 

506 else: 

507 terms = value.split() 

508 # new_terms = [ "*{}*".format(t for t in terms)] 

509 q += "(" + " AND ".join(terms) + ")" 

510 

511 return q 

512 

513 def internal_do(self) -> search_helpers.SearchResults: 

514 super().internal_do() 

515 

516 if settings.COLLECTION_PID == "CR": 

517 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"] 

518 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids] 

519 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]") 

520 elif settings.COLLECTION_PID != "ALL": 

521 self.filters.append(f"sites:{settings.SITE_ID}") 

522 

523 sort = "score desc" 

524 if self.sort: 524 ↛ 537line 524 didn't jump to line 537, because the condition on line 524 was never false

525 sorts = [] 

526 sort_array = self.sort.split(",") 

527 for spec in sort_array: 

528 spec = spec.strip() 

529 if spec[0] == "-": 529 ↛ 532line 529 didn't jump to line 532, because the condition on line 529 was never false

530 spec = f"{spec[1:]} desc" 

531 else: 

532 spec = f"{spec} asc" 

533 sorts.append(spec) 

534 sorts.append("year desc") 

535 sort = ", ".join(sorts) 

536 

537 use_ar_facet = True 

538 q = "" 

539 if self.qs: 539 ↛ 546line 539 didn't jump to line 546, because the condition on line 539 was never false

540 for qi in self.qs: 

541 if qi["name"] == "author_ref": 

542 use_ar_facet = False 

543 if qi["value"] or qi["first"]: 543 ↛ 540line 543 didn't jump to line 540, because the condition on line 543 was never false

544 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"]) 

545 q += new_q + " " 

546 if q: 546 ↛ 549line 546 didn't jump to line 549, because the condition on line 546 was never false

547 self.q = q 

548 

549 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"] 

550 

551 if use_ar_facet: 

552 facet_fields.append("ar") 

553 

554 if settings.COLLECTION_PID == "CR": 

555 facet_fields.append("sites") 

556 elif settings.COLLECTION_PID == "PCJ": 556 ↛ 557line 556 didn't jump to line 557, because the condition on line 556 was never true

557 facet_fields.append("classname") 

558 

559 params = { 

560 "q.op": "AND", 

561 "sort": sort, 

562 "facet.field": facet_fields, 

563 # Decades are built manually because we allow the user to 

564 # expand a decade and see individual years 

565 "facet.range": "year_facet", 

566 "f.year_facet.facet.range.start": 0, 

567 "f.year_facet.facet.range.end": 3000, 

568 "f.year_facet.facet.range.gap": 10, 

569 "facet.mincount": 1, 

570 "facet.limit": 100, 

571 "facet.sort": "count", 

572 # 'fl': '*,score', # pour debug 

573 # 'debugQuery': 'true', # pour debug 

574 "hl": "true", 

575 # 'hl.fl': "*", -> par defaut, retourne les champs de qf 

576 "hl.snippets": 1, 

577 "hl.fragsize": 300, 

578 "hl.simple.pre": "<strong>", 

579 "hl.simple.post": "</strong>", 

580 "defType": "edismax", 

581 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on 

582 # ajoute 0.1 x le score des autres champs 

583 # "df": 'text', Not used with dismax queries 

584 # We want to retrieve the highlights in both _tex ad _html. 

585 # We need to specify the 2 in qf 

586 "qf": [ 

587 "au^21", 

588 "title_tex^13", 

589 "title_html^13", 

590 "trans_title_tex^13", 

591 "trans_title_html^13", 

592 "abstract_tex^8", 

593 "trans_abstract_tex^8", 

594 "kwd^5", 

595 "trans_kwd^5", 

596 "collection_title_html^3", 

597 "collection_title_tex^3", 

598 "body^2", 

599 "bibitem", 

600 ], 

601 # field ar est multivalué dédié aux facettes 

602 # field au est utilisé pour la recherche et pour l'affichage 

603 # des resultats 

604 } 

605 

606 if self.start: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true

607 params["start"] = self.start 

608 

609 if self.rows: 

610 params["rows"] = self.rows 

611 

612 if self.filters: 

613 params["fq"] = self.filters 

614 

615 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params) 

616 

617 search_results = search_helpers.SearchResults( 

618 solr_results, self.search_path, self.filters, use_ar_facet 

619 ) 

620 

621 return search_results 

622 

623 

624##################################################################### 

625# 

626# solrInternalSearchCmd: 

627# 

628# called from ptf/views.py/book by author 

629# 

630###################################################################### 

631class solrInternalSearchCmd(solrCmd): 

632 def __init__(self, params={}): 

633 self.q = "*:*" 

634 self.qs = None 

635 self.filters = [] # TODO: implicit filters 

636 self.start = None 

637 self.rows = None 

638 self.sort = None # '-score' # use ',' to specify multiple criteria 

639 self.site = None 

640 self.search_path = "" 

641 self.facet_fields = [] 

642 self.facet_limit = 100 

643 self.fl = None 

644 self.create_facets = True 

645 # 10/03/2023 - UNUSED 

646 self.related_articles = False 

647 

648 super().__init__(params) 

649 

650 self.required_params.extend(["q"]) 

651 

652 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results: 

653 super().internal_do() 

654 

655 # 10/03/2023 - UNUSED 

656 if self.site: 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true

657 self.fq.append(f"sites:{self.site}") 

658 

659 the_facet_fields = [] 

660 use_year_facet = False 

661 for field in self.facet_fields: 

662 if field == "firstLetter": 

663 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter") 

664 elif field == "author_facet": 

665 the_facet_fields.append("ar") 

666 else: 

667 the_facet_fields.append(field) 

668 

669 if field == "year_facet": 

670 use_year_facet = True 

671 

672 # 10/03/2023 - UNUSED 

673 if self.related_articles: 673 ↛ 674line 673 didn't jump to line 674

674 params = { 

675 "q.op": "OR", 

676 "hl": "true", 

677 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd", 

678 "hl.snippets": 1, 

679 "hl.fragsize": 0, 

680 "hl.simple.pre": "<strong>", 

681 "hl.simple.post": "</strong>", 

682 # "hl.method": "unified" 

683 } 

684 else: 

685 params = { 

686 "q.op": "AND", 

687 # 'fl': '*,score', # pour debug 

688 # 'debugQuery': 'true', # pour debug 

689 "facet.field": the_facet_fields, 

690 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'], 

691 "facet.mincount": 1, 

692 "facet.limit": self.facet_limit, 

693 "facet.sort": "index", 

694 } 

695 

696 if use_year_facet: 696 ↛ 708line 696 didn't jump to line 708, because the condition on line 696 was never false

697 # Decades are built manually because we allow the user to expand a 

698 # decade and see individual years 

699 params.update( 

700 { 

701 "facet.range": "year_facet", 

702 "f.year_facet.facet.range.start": 0, 

703 "f.year_facet.facet.range.end": 3000, 

704 "f.year_facet.facet.range.gap": 10, 

705 } 

706 ) 

707 

708 if self.sort: 708 ↛ 711line 708 didn't jump to line 711, because the condition on line 708 was never false

709 params["sort"] = self.sort 

710 

711 if self.start: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true

712 params["start"] = self.start 

713 

714 if self.rows: 714 ↛ 717line 714 didn't jump to line 717, because the condition on line 714 was never false

715 params["rows"] = self.rows 

716 

717 if self.filters: 

718 params["fq"] = self.filters 

719 

720 if self.fl: 720 ↛ 721line 720 didn't jump to line 721, because the condition on line 720 was never true

721 params["fl"] = self.fl 

722 

723 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params) 

724 results = solr_results 

725 

726 if self.create_facets: 726 ↛ 731line 726 didn't jump to line 731, because the condition on line 726 was never false

727 results = search_helpers.SearchInternalResults( 

728 solr_results, self.search_path, self.filters, self.facet_fields 

729 ) 

730 

731 return results 

732 

733 

734##################################################################### 

735# 

736# solrGetDocumentByPidCmd: 

737# 

738# 

739###################################################################### 

740 

741 

742class solrGetDocumentByPidCmd(solrCmd): 

743 def __init__(self, params={}): 

744 self.pid = None 

745 

746 super().__init__(params) 

747 

748 self.required_params.extend(["pid"]) 

749 

750 def internal_do(self): 

751 super().internal_do() 

752 

753 result = None 

754 

755 search = "pid:" + self.pid 

756 results = solrFactory.get_solr().search(search) 

757 

758 if results is not None: 758 ↛ 764line 758 didn't jump to line 764, because the condition on line 758 was never false

759 docs = results.docs 

760 

761 if docs: 

762 result = docs[0] 

763 

764 return result 

765 

766 

767class updateResourceSolrCmd(solrAddCmd): 

768 """ """ 

769 

770 def __init__(self, params=None): 

771 self.resource = None 

772 

773 super().__init__(params) 

774 self.params = params 

775 

776 def set_resource(self, resource): 

777 self.resource = resource 

778 self.id = resource.id 

779 self.pid = resource.pid 

780 

781 def pre_do(self): 

782 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do() 

783 if doc: 

784 self.data = {**doc, **self.params} 

785 if "_version_" in self.data: 

786 del self.data["_version_"] 

787 if "contributors" in self.data: 

788 solr_add_contributors_to_data(self.data["contributors"], self.data) 

789 self.data.pop("contributors") 

790 # if 'kwd_groups' in self.data: 

791 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data) 

792 # self.data.pop('kwd_groups') 

793 super().pre_do() 

794 

795 

796def research_more_like_this(article): 

797 results = {"docs": []} 

798 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do() 

799 if doc: 

800 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body" 

801 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all" 

802 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true" 

803 min_score = 80 if boost == "true" else 40 

804 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score 

805 params = {"debugQuery": "true", "mlt.interestingTerms": "details"} 

806 params.update({"mlt.boost": boost, "fl": "*,score"}) 

807 params.update({"mlt.minwl": 4, "mlt.maxwl": 100}) 

808 params.update({"mlt.mintf": 2, "mlt.mindf": 2}) 

809 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50}) 

810 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"}) 

811 

812 pid = article.pid.split("_")[0] 

813 if pid[:2] == "CR": 

814 # search suggested articles in all CR 

815 params.update({"fq": r"pid:/CR.*/"}) 

816 else: 

817 params.update({"fq": f"pid:/{pid}.*/"}) 

818 

819 solr = solrFactory.get_solr() 

820 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params) 

821 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields}) 

822 params.update({"min_score": min_score}) 

823 results["params"] = dict(sorted(params.items())) 

824 results["docs"] = similar.docs 

825 results["numFound"] = similar.raw_response["response"]["numFound"] 

826 results["interestingTerms"] = similar.raw_response["interestingTerms"] 

827 results["explain"] = similar.debug["explain"] 

828 return results 

829 

830 

831def is_excluded_suggested_article(title): 

832 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else [] 

833 start = ( 

834 settings.MLT_EXCLUDED_TITLES_START 

835 if hasattr(settings, "MLT_EXCLUDED_TITLES_START") 

836 else [] 

837 ) 

838 return title.startswith(tuple(start)) or title in match 

839 

840 

841def auto_suggest_doi(suggest, article, results=None): 

842 if not results: 842 ↛ 845line 842 didn't jump to line 845, because the condition on line 842 was never false

843 results = research_more_like_this(article) 

844 

845 if results and suggest.automatic_list: 845 ↛ 854line 845 didn't jump to line 854, because the condition on line 845 was never false

846 doi_list = [] 

847 for item in results["docs"][:3]: 847 ↛ 848line 847 didn't jump to line 848, because the loop on line 847 never started

848 if item["score"] > results["params"]["min_score"]: 

849 doi = item.get("doi", "") 

850 title = item.get("title_tex", "") 

851 if doi not in doi_list and not is_excluded_suggested_article(title): 

852 doi_list.append(doi) 

853 suggest.doi_list = "\n".join(doi_list) 

854 return results