Coverage for apps/ptf/cmds/solr

1import pysolr

3from django.conf import settings

5from ptf.cmds.base_cmds import baseCmd

6from ptf.cmds.base_cmds import make_int

7from ptf.display import resolver

8from ptf.site_register import SITE_REGISTER

9from ptf.solr import search_helpers

10from ptf.utils import get_display_name

12# Not used so far.

13# nlm2solr use normalize-space for volume and volume-series,

14# but make_int is called to convert into int: spaces are also trimmed

15# def normalize_whitespace(str):

16# import re

17# str = str.strip()

18# str = re.sub(r'\s+', ' ', str)

19# return str

22class solrFactory:

23 solr = None

24 solr_url = None

26 @staticmethod

27 def get_solr():

28 if solrFactory.solr is None:

29 if solrFactory.solr_url is None:

30 solrFactory.solr_url = settings.SOLR_URL

31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10)

32 return solrFactory.solr

34 @staticmethod

35 def do_solr_commit():

36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 return

39 solr = solrFactory.get_solr()

40 solr.commit()

42 @staticmethod

43 def do_solr_rollback():

44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 return

47 solr = solrFactory.get_solr()

48 msg = "<rollback />"

49 solr._update(msg)

51 @staticmethod

52 def reset():

53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55, because the condition on line 53 was never false

54 solrFactory.solr.get_session().close()

55 solrFactory.solr = None

58def solr_add_contributors_to_data(contributors, data):

59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data', because the condition on line 59 was never false

60 author_names = []

61 ar = []

62 aul = []

64 for c in contributors:

65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64, because the condition on line 65 was never false

66 display_name = get_display_name(

67 "", c["first_name"], c["last_name"], "", c["string_name"]

68 )

69 ref_name = c["mid"] if c["mid"] else display_name

71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73, because the condition on line 71 was never false

72 ar.append(ref_name)

73 if display_name: 73 ↛ 75line 73 didn't jump to line 75, because the condition on line 73 was never false

74 author_names.append(display_name)

75 if c["last_name"]:

76 aul.append(c["last_name"])

78 data["au"] = "; ".join(author_names)

79 # auteurs de references

80 data["ar"] = ar

81 # Surnames / Lastnames / Nom de famille

82 data["aul"] = aul

84 if author_names:

85 data["fau"] = author_names[0]

88def solr_add_kwds_to_data(kwds, data):

89 data["kwd"] = ", ".join(

90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"]

91 )

92 data["trans_kwd"] = ", ".join(

93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"]

94 )

95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"]

98#####################################################################

99#

100# solrCmd: base class for Solr commands

101#

102######################################################################

103class solrCmd(baseCmd):

104 def __init__(self, params={}):

105 super().__init__(params)

106

107 def do(self, parent=None):

108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true

109 return None

110

111 return super().do(parent)

112

113 def post_do(self, resource=None):

114 super().post_do(resource)

115

116 def undo(self):

117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true

118 return None

119

120 return super().undo()

121

122

123#####################################################################

124#

125# solrDeleteCmd: generic to delete Solr documents, based on a query

126#

127######################################################################

128class solrDeleteCmd(solrCmd):

129 def __init__(self, params={}):

130 self.commit = True

131 self.q = None

132

133 super().__init__(params)

134

135 self.required_params.extend(["q"])

136

137 def internal_do(self):

138 super().internal_do()

139

140 solrFactory.get_solr().delete(q=self.q, commit=self.commit)

141

142 return None

143

144

145#####################################################################

146#

147# solrAddCmd: base class for Solr Add commands

148#

149######################################################################

150class solrAddCmd(solrCmd):

151 def __init__(self, params={}):

152 self.commit = True

153 self.db_obj = None

154 self.id = None

155 self.pid = None

156 self.data = {}

157

158 super().__init__(params)

159

160 self.required_params.extend(["id", "pid"])

161 self.required_delete_params.extend(["id"])

162

163 def pre_do(self):

164 super().pre_do()

165

166 self.data["id"] = self.id

167 self.data["pid"] = self.pid

168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID

169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do

170 cmd = solrDeleteCmd({"q": "pid:" + self.pid})

171 cmd.do()

172

173 def internal_do(self):

174 super().internal_do()

175

176 datas = [self.data]

177

178 solrFactory.get_solr().add(docs=datas, commit=self.commit)

179

180 return None

181

182 def internal_undo(self):

183 id = super().internal_undo()

184

185 solrFactory.get_solr().delete(id=self.id, commit=self.commit)

186

187 return id

188

189

190#####################################################################

191#

192# addResourceSolrCmd: base class for solrAddCmds adding a Resource

193#

194######################################################################

195class addResourceSolrCmd(solrAddCmd):

196 def __init__(self, params={}):

197 self.xobj = None # model_data object

198

199 # fields of the xobj to pass to SolR

200 self.fields = [

201 "lang",

202 "doi",

203 "title_tex",

204 "title_html",

205 "trans_title_tex",

206 "trans_title_html",

207 "abstract_tex",

208 "abstract_html",

209 "trans_abstract_tex",

210 "trans_abstract_html",

211 "collection_title_tex",

212 "collection_title_html",

213 "collection_id",

214 "year",

215 "body",

216 "bibitem",

217 ]

218

219 # Used to filter the articles based on their site

220 self.sites = None

221

222 super().__init__(params)

223

224 self.required_params.extend(["xobj"])

225

226 def add_collection(self, collection):

227 self.data["collection_id"] = collection.id

228

229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232, because the condition on line 229 was never false

230 self.data["collection_title_tex"] = [collection.title_tex]

231 else:

232 self.data["collection_title_tex"].append(collection.title_tex)

233

234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237, because the condition on line 234 was never false

235 self.data["collection_title_html"] = [collection.title_html]

236 else:

237 self.data["collection_title_html"].append(collection.title_html)

238

239 # classname is used only by PCJ for the article types

240 if collection.coltype == "journal":

241 self.data["dt"] = ["Article de revue"]

242 elif collection.coltype == "acta":

243 self.data["dt"] = ["Acte de séminaire"]

244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true

245 self.data["classname"] = "Thèse"

246 self.data["dt"] = ["Thèse"]

247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true

248 self.data["classname"] = "Notes de cours"

249 self.data["dt"] = ["Notes de cours"]

250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251, because the condition on line 250 was never true

251 self.data["classname"] = "Acte de rencontre"

252 self.data["dt"] = ["Acte de rencontre"]

253 else:

254 self.data["classname"] = "Livre"

255 self.data["dt"] = ["Livre"]

256

257 def add_abstracts_to_data(self):

258 for abstract in self.xobj.abstracts:

259 lang = abstract["lang"]

260

261 for field_type in ["tex", "html"]:

262 abstract_field = "value_" + field_type

263 field_name = "abstract_" + field_type

264 if lang != "fr":

265 field_name = "trans_" + field_name

266

267 self.data[field_name] = abstract[abstract_field]

268

269 def add_year_to_data(self, year):

270 if year:

271 years = str(year).split("-")

272 if len(years) > 1:

273 self.data["year_facet"] = int(years[1])

274 else:

275 self.data["year_facet"] = int(year)

276

277 def pre_do(self):

278 super().pre_do()

279

280 for field in self.fields:

281 if hasattr(self.xobj, field):

282 self.data[field] = getattr(self.xobj, field)

283

284 self.add_abstracts_to_data()

285 solr_add_kwds_to_data(self.xobj.kwds, self.data)

286 solr_add_contributors_to_data(self.xobj.contributors, self.data)

287

288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289, because the condition on line 288 was never true

289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}")

290

291 # year either comes directly from xobj (container) or from set_container

292 self.add_year_to_data(self.data["year"])

293

294 if self.db_obj is not None:

295 solr_fields = {

296 "application/pdf": "pdf",

297 "image/x.djvu": "djvu",

298 "application/x-tex": "tex",

299 }

300 for stream in self.xobj.streams:

301 mimetype = stream["mimetype"]

302 if mimetype in solr_fields:

303 href = self.db_obj.get_binary_file_href_full_path(

304 "self", mimetype, stream["location"]

305 )

306 self.data[solr_fields[mimetype]] = href

307

308 if self.db_obj is not None:

309 self.data["wall"] = self.db_obj.get_wall()

310

311 if self.sites:

312 self.data["sites"] = self.sites

313 else:

314 self.data["sites"] = [settings.SITE_ID]

315

316

317#####################################################################

318#

319# addContainerSolrCmd: adds/remove a container (issue/book)

320#

321# A container needs a collection (collection_title_tex etc.)

322#

323######################################################################

324class addContainerSolrCmd(addResourceSolrCmd):

325 def __init__(self, params={}):

326 super().__init__(params)

327

328 self.fields.extend(["ctype"])

329 # self.data["dt"] = ["Livre"]

330

331 def pre_do(self):

332 super().pre_do()

333

334 for field in ["volume", "number", "vseries"]:

335 if hasattr(self.xobj, field):

336 self.data["volume"] = make_int(getattr(self.xobj, field))

337

338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0:

339 incol = self.xobj.incollection[0]

340 self.data["vseries"] = make_int(incol.vseries)

341 self.data["volume"] = 0

342 self.data["number"] = make_int(incol.volume)

343

344 # if incol.coltype == "theses":

345 # self.data["dt"] = ["Thèse"]

346

347

348#####################################################################

349#

350# addArticleSolrCmd: adds/remove an article

351#

352# an article needs a container (container_id) that needs a collection (collection_id)

353#

354######################################################################

355class addArticleSolrCmd(addResourceSolrCmd):

356 def __init__(self, params={}):

357 super().__init__(params)

358

359 self.fields.extend(

360 ["page_range", "container_id", "volume", "number", "vseries", "article_number"]

361 )

362 # self.data["dt"] = ["Article"]

363

364 def set_container(self, container):

365 self.data["container_id"] = container.id

366 self.data["year"] = container.year

367 self.data["vseries"] = make_int(container.vseries)

368 self.data["volume"] = make_int(container.volume)

369 self.data["number"] = make_int(container.number)

370

371 def set_eprint(self, eprint):

372 self.data["dt"].append("e-print")

373

374 def set_source(self, source):

375 pass

376

377 def set_thesis(self, thesis):

378 self.data["dt"].append("thesis")

379

380 def set_original_article(self, article):

381 # TODO Replace some data (ie doi, pid) with the original article

382 pass

383

384 def pre_do(self):

385 super().pre_do()

386

387 self.data["classname"] = resolver.ARTICLE_TYPES.get(

388 self.xobj.atype, "Article de recherche"

389 )

390

391 self.data["page_range"] = ""

392 if not self.xobj.page_range:

393 self.data["page_range"] = "p. "

394 if self.xobj.fpage is not None: 394 ↛ 396line 394 didn't jump to line 396, because the condition on line 394 was never false

395 self.data["page_range"] += self.xobj.fpage

396 if self.xobj.fpage and self.xobj.lpage:

397 self.data["page_range"] += "-"

398 if self.xobj.lpage is not None: 398 ↛ exitline 398 didn't return from function 'pre_do', because the condition on line 398 was never false

399 self.data["page_range"] += self.xobj.lpage

400 elif self.xobj.page_range[0] != "p": 400 ↛ exitline 400 didn't return from function 'pre_do', because the condition on line 400 was never false

401 self.data["page_range"] = "p. " + self.xobj.page_range

402

403

404#####################################################################

405#

406# addBookPartSolrCmd: adds/remove an book part (similar to an article)

407#

408# a book part needs a collection id (array)

409#

410######################################################################

411class addBookPartSolrCmd(addResourceSolrCmd):

412 def __init__(self, params={}):

413 super().__init__(params)

414

415 self.fields.extend(

416 ["page_range", "container_title_tex", "container_title_html", "volume", "number"]

417 )

418 # self.data["dt"] = ["Chapitre de livre"]

419

420 def set_container(self, container):

421 self.data["container_id"] = container.id

422 self.data["year"] = container.year

423 self.data["volume"] = make_int(container.volume)

424 self.data["number"] = make_int(container.number)

425 self.data["container_title_tex"] = container.title_tex

426 self.data["container_title_html"] = container.title_html

427

428 def pre_do(self):

429 super().pre_do()

430

431 self.data["classname"] = "Chapitre de livre"

432

433 self.data["page_range"] = ""

434 if not self.xobj.page_range: 434 ↛ 442line 434 didn't jump to line 442, because the condition on line 434 was never false

435 self.data["page_range"] = "p. "

436 if self.xobj.fpage is not None: 436 ↛ 438line 436 didn't jump to line 438, because the condition on line 436 was never false

437 self.data["page_range"] += self.xobj.fpage

438 if self.xobj.fpage and self.xobj.lpage:

439 self.data["page_range"] += "-"

440 if self.xobj.lpage is not None: 440 ↛ exitline 440 didn't return from function 'pre_do', because the condition on line 440 was never false

441 self.data["page_range"] += self.xobj.lpage

442 elif self.xobj.page_range[0] != "p":

443 self.data["page_range"] = "p. " + self.xobj.page_range

444

445

446#####################################################################

447#

448# solrSearchCmd:

449#

450# called from ptf/views.py; SolrRequest(request, q, alias=alias,

451# site=site,

452# default={'sort': '-score'})

453#

454# Warning: As of July 2018, only 1 site id is stored in a SolR document

455# Although the SolR schema is already OK to store multiple sites ("sites" is an array)

456# no Solr commands have been written to add/remove sites

457# We only have add commands.

458# Search only works if the Solr instance is meant for individual or ALL sites

459#

460######################################################################

461class solrSearchCmd(solrCmd):

462 def __init__(self, params={}):

463 # self.q = '*:*'

464 self.q = ""

465 self.qs = None

466 self.filters = [] # TODO: implicit filters

467 self.start = None

468 self.rows = None

469 self.sort = "-score" # use ',' to specify multiple criteria

470 self.site = None

471 self.search_path = ""

472

473 super().__init__(params)

474

475 self.required_params.extend(["qs"])

476

477 def get_q(self, name, value, exclude, first, last):

478 if name == "all" and value == "*":

479 return "*:*"

480

481 if value == "*": 481 ↛ 482line 481 didn't jump to line 482, because the condition on line 481 was never true

482 value = ""

483

484 q = ""

485 if exclude:

486 q += "-"

487

488 if name == "date":

489 q += "year:[" + first + " TO " + last + "]"

490

491 else:

492 if name == "author":

493 q += "au:"

494 if name == "author_ref":

495 q += "ar:"

496 elif name == "title": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 q += "title_tex:"

498 elif name == "body": 498 ↛ 499line 498 didn't jump to line 499, because the condition on line 498 was never true

499 q += "body:"

500 elif name == "references": 500 ↛ 501line 500 didn't jump to line 501, because the condition on line 500 was never true

501 q += "bibitem:"

502 elif name == "kwd": 502 ↛ 503line 502 didn't jump to line 503, because the condition on line 502 was never true

503 q += "kwd:"

504 if len(value) > 0 and value[0] == '"' and value[-1] == '"':

505 q += value

506 else:

507 terms = value.split()

508 # new_terms = [ "*{}*".format(t for t in terms)]

509 q += "(" + " AND ".join(terms) + ")"

510

511 return q

512

513 def internal_do(self) -> search_helpers.SearchResults:

514 super().internal_do()

515

516 if settings.COLLECTION_PID == "CR":

517 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"]

518 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids]

519 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]")

520 elif settings.COLLECTION_PID != "ALL":

521 self.filters.append(f"sites:{settings.SITE_ID}")

522

523 sort = "score desc"

524 if self.sort: 524 ↛ 537line 524 didn't jump to line 537, because the condition on line 524 was never false

525 sorts = []

526 sort_array = self.sort.split(",")

527 for spec in sort_array:

528 spec = spec.strip()

529 if spec[0] == "-": 529 ↛ 532line 529 didn't jump to line 532, because the condition on line 529 was never false

530 spec = f"{spec[1:]} desc"

531 else:

532 spec = f"{spec} asc"

533 sorts.append(spec)

534 sorts.append("year desc")

535 sort = ", ".join(sorts)

536

537 use_ar_facet = True

538 q = ""

539 if self.qs: 539 ↛ 546line 539 didn't jump to line 546, because the condition on line 539 was never false

540 for qi in self.qs:

541 if qi["name"] == "author_ref":

542 use_ar_facet = False

543 if qi["value"] or qi["first"]: 543 ↛ 540line 543 didn't jump to line 540, because the condition on line 543 was never false

544 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"])

545 q += new_q + " "

546 if q: 546 ↛ 549line 546 didn't jump to line 549, because the condition on line 546 was never false

547 self.q = q

548

549 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"]

550

551 if use_ar_facet:

552 facet_fields.append("ar")

553

554 if settings.COLLECTION_PID == "CR":

555 facet_fields.append("sites")

556 elif settings.COLLECTION_PID == "PCJ": 556 ↛ 557line 556 didn't jump to line 557, because the condition on line 556 was never true

557 facet_fields.append("classname")

558

559 params = {

560 "q.op": "AND",

561 "sort": sort,

562 "facet.field": facet_fields,

563 # Decades are built manually because we allow the user to

564 # expand a decade and see individual years

565 "facet.range": "year_facet",

566 "f.year_facet.facet.range.start": 0,

567 "f.year_facet.facet.range.end": 3000,

568 "f.year_facet.facet.range.gap": 10,

569 "facet.mincount": 1,

570 "facet.limit": 100,

571 "facet.sort": "count",

572 # 'fl': '*,score', # pour debug

573 # 'debugQuery': 'true', # pour debug

574 "hl": "true",

575 # 'hl.fl': "*", -> par defaut, retourne les champs de qf

576 "hl.snippets": 1,

577 "hl.fragsize": 300,

578 "hl.simple.pre": "<strong>",

579 "hl.simple.post": "</strong>",

580 "defType": "edismax",

581 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on

582 # ajoute 0.1 x le score des autres champs

583 # "df": 'text', Not used with dismax queries

584 # We want to retrieve the highlights in both _tex ad _html.

585 # We need to specify the 2 in qf

586 "qf": [

587 "au^21",

588 "title_tex^13",

589 "title_html^13",

590 "trans_title_tex^13",

591 "trans_title_html^13",

592 "abstract_tex^8",

593 "trans_abstract_tex^8",

594 "kwd^5",

595 "trans_kwd^5",

596 "collection_title_html^3",

597 "collection_title_tex^3",

598 "body^2",

599 "bibitem",

600 ],

601 # field ar est multivalué dédié aux facettes

602 # field au est utilisé pour la recherche et pour l'affichage

603 # des resultats

604 }

605

606 if self.start: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true

607 params["start"] = self.start

608

609 if self.rows:

610 params["rows"] = self.rows

611

612 if self.filters:

613 params["fq"] = self.filters

614

615 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)

616

617 search_results = search_helpers.SearchResults(

618 solr_results, self.search_path, self.filters, use_ar_facet

619 )

620

621 return search_results

622

623

624#####################################################################

625#

626# solrInternalSearchCmd:

627#

628# called from ptf/views.py/book by author

629#

630######################################################################

631class solrInternalSearchCmd(solrCmd):

632 def __init__(self, params={}):

633 self.q = "*:*"

634 self.qs = None

635 self.filters = [] # TODO: implicit filters

636 self.start = None

637 self.rows = None

638 self.sort = None # '-score' # use ',' to specify multiple criteria

639 self.site = None

640 self.search_path = ""

641 self.facet_fields = []

642 self.facet_limit = 100

643 self.fl = None

644 self.create_facets = True

645 # 10/03/2023 - UNUSED

646 self.related_articles = False

647

648 super().__init__(params)

649

650 self.required_params.extend(["q"])

651

652 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results:

653 super().internal_do()

654

655 # 10/03/2023 - UNUSED

656 if self.site: 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true

657 self.fq.append(f"sites:{self.site}")

658

659 the_facet_fields = []

660 use_year_facet = False

661 for field in self.facet_fields:

662 if field == "firstLetter":

663 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter")

664 elif field == "author_facet":

665 the_facet_fields.append("ar")

666 else:

667 the_facet_fields.append(field)

668

669 if field == "year_facet":

670 use_year_facet = True

671

672 # 10/03/2023 - UNUSED

673 if self.related_articles: 673 ↛ 674line 673 didn't jump to line 674

674 params = {

675 "q.op": "OR",

676 "hl": "true",

677 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd",

678 "hl.snippets": 1,

679 "hl.fragsize": 0,

680 "hl.simple.pre": "<strong>",

681 "hl.simple.post": "</strong>",

682 # "hl.method": "unified"

683 }

684 else:

685 params = {

686 "q.op": "AND",

687 # 'fl': '*,score', # pour debug

688 # 'debugQuery': 'true', # pour debug

689 "facet.field": the_facet_fields,

690 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'],

691 "facet.mincount": 1,

692 "facet.limit": self.facet_limit,

693 "facet.sort": "index",

694 }

695

696 if use_year_facet: 696 ↛ 708line 696 didn't jump to line 708, because the condition on line 696 was never false

697 # Decades are built manually because we allow the user to expand a

698 # decade and see individual years

699 params.update(

700 {

701 "facet.range": "year_facet",

702 "f.year_facet.facet.range.start": 0,

703 "f.year_facet.facet.range.end": 3000,

704 "f.year_facet.facet.range.gap": 10,

705 }

706 )

707

708 if self.sort: 708 ↛ 711line 708 didn't jump to line 711, because the condition on line 708 was never false

709 params["sort"] = self.sort

710

711 if self.start: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true

712 params["start"] = self.start

713

714 if self.rows: 714 ↛ 717line 714 didn't jump to line 717, because the condition on line 714 was never false

715 params["rows"] = self.rows

716

717 if self.filters:

718 params["fq"] = self.filters

719

720 if self.fl: 720 ↛ 721line 720 didn't jump to line 721, because the condition on line 720 was never true

721 params["fl"] = self.fl

722

723 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)

724 results = solr_results

725

726 if self.create_facets: 726 ↛ 731line 726 didn't jump to line 731, because the condition on line 726 was never false

727 results = search_helpers.SearchInternalResults(

728 solr_results, self.search_path, self.filters, self.facet_fields

729 )

730

731 return results

732

733

734#####################################################################

735#

736# solrGetDocumentByPidCmd:

737#

738#

739######################################################################

740

741

742class solrGetDocumentByPidCmd(solrCmd):

743 def __init__(self, params={}):

744 self.pid = None

745

746 super().__init__(params)

747

748 self.required_params.extend(["pid"])

749

750 def internal_do(self):

751 super().internal_do()

752

753 result = None

754

755 search = "pid:" + self.pid

756 results = solrFactory.get_solr().search(search)

757

758 if results is not None: 758 ↛ 764line 758 didn't jump to line 764, because the condition on line 758 was never false

759 docs = results.docs

760

761 if docs:

762 result = docs[0]

763

764 return result

765

766

767class updateResourceSolrCmd(solrAddCmd):

768 """ """

769

770 def __init__(self, params=None):

771 self.resource = None

772

773 super().__init__(params)

774 self.params = params

775

776 def set_resource(self, resource):

777 self.resource = resource

778 self.id = resource.id

779 self.pid = resource.pid

780

781 def pre_do(self):

782 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do()

783 if doc:

784 self.data = {**doc, **self.params}

785 if "_version_" in self.data:

786 del self.data["_version_"]

787 if "contributors" in self.data:

788 solr_add_contributors_to_data(self.data["contributors"], self.data)

789 self.data.pop("contributors")

790 # if 'kwd_groups' in self.data:

791 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data)

792 # self.data.pop('kwd_groups')

793 super().pre_do()

794

795

796def research_more_like_this(article):

797 results = {"docs": []}

798 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do()

799 if doc:

800 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body"

801 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all"

802 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true"

803 min_score = 80 if boost == "true" else 40

804 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score

805 params = {"debugQuery": "true", "mlt.interestingTerms": "details"}

806 params.update({"mlt.boost": boost, "fl": "*,score"})

807 params.update({"mlt.minwl": 4, "mlt.maxwl": 100})

808 params.update({"mlt.mintf": 2, "mlt.mindf": 2})

809 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50})

810 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"})

811

812 pid = article.pid.split("_")[0]

813 if pid[:2] == "CR":

814 # search suggested articles in all CR

815 params.update({"fq": r"pid:/CR.*/"})

816 else:

817 params.update({"fq": f"pid:/{pid}.*/"})

818

819 solr = solrFactory.get_solr()

820 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params)

821 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields})

822 params.update({"min_score": min_score})

823 results["params"] = dict(sorted(params.items()))

824 results["docs"] = similar.docs

825 results["numFound"] = similar.raw_response["response"]["numFound"]

826 results["interestingTerms"] = similar.raw_response["interestingTerms"]

827 results["explain"] = similar.debug["explain"]

828 return results

829

830

831def is_excluded_suggested_article(title):

832 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else []

833 start = (

834 settings.MLT_EXCLUDED_TITLES_START

835 if hasattr(settings, "MLT_EXCLUDED_TITLES_START")

836 else []

837 )

838 return title.startswith(tuple(start)) or title in match

839

840

841def auto_suggest_doi(suggest, article, results=None):

842 if not results: 842 ↛ 845line 842 didn't jump to line 845, because the condition on line 842 was never false

843 results = research_more_like_this(article)

844

845 if results and suggest.automatic_list: 845 ↛ 854line 845 didn't jump to line 854, because the condition on line 845 was never false

846 doi_list = []

847 for item in results["docs"][:3]: 847 ↛ 848line 847 didn't jump to line 848, because the loop on line 847 never started

848 if item["score"] > results["params"]["min_score"]:

849 doi = item.get("doi", "")

850 title = item.get("title_tex", "")

851 if doi not in doi_list and not is_excluded_suggested_article(title):

852 doi_list.append(doi)

853 suggest.doi_list = "\n".join(doi_list)

854 return results

Coverage for apps/ptf/cmds/solr_cmds.py: 84%

463 statements