Coverage for apps/ptf/cmds/solr_cmds.py: 84%
463 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
1import pysolr
3from django.conf import settings
5from ptf.cmds.base_cmds import baseCmd
6from ptf.cmds.base_cmds import make_int
7from ptf.display import resolver
8from ptf.site_register import SITE_REGISTER
9from ptf.solr import search_helpers
10from ptf.utils import get_display_name
12# Not used so far.
13# nlm2solr use normalize-space for volume and volume-series,
14# but make_int is called to convert into int: spaces are also trimmed
15# def normalize_whitespace(str):
16# import re
17# str = str.strip()
18# str = re.sub(r'\s+', ' ', str)
19# return str
22class solrFactory:
23 solr = None
24 solr_url = None
26 @staticmethod
27 def get_solr():
28 if solrFactory.solr is None:
29 if solrFactory.solr_url is None:
30 solrFactory.solr_url = settings.SOLR_URL
31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10)
32 return solrFactory.solr
34 @staticmethod
35 def do_solr_commit():
36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true
37 return
39 solr = solrFactory.get_solr()
40 solr.commit()
42 @staticmethod
43 def do_solr_rollback():
44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 return
47 solr = solrFactory.get_solr()
48 msg = "<rollback />"
49 solr._update(msg)
51 @staticmethod
52 def reset():
53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55, because the condition on line 53 was never false
54 solrFactory.solr.get_session().close()
55 solrFactory.solr = None
58def solr_add_contributors_to_data(contributors, data):
59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data', because the condition on line 59 was never false
60 author_names = []
61 ar = []
62 aul = []
64 for c in contributors:
65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64, because the condition on line 65 was never false
66 display_name = get_display_name(
67 "", c["first_name"], c["last_name"], "", c["string_name"]
68 )
69 ref_name = c["mid"] if c["mid"] else display_name
71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73, because the condition on line 71 was never false
72 ar.append(ref_name)
73 if display_name: 73 ↛ 75line 73 didn't jump to line 75, because the condition on line 73 was never false
74 author_names.append(display_name)
75 if c["last_name"]:
76 aul.append(c["last_name"])
78 data["au"] = "; ".join(author_names)
79 # auteurs de references
80 data["ar"] = ar
81 # Surnames / Lastnames / Nom de famille
82 data["aul"] = aul
84 if author_names:
85 data["fau"] = author_names[0]
88def solr_add_kwds_to_data(kwds, data):
89 data["kwd"] = ", ".join(
90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"]
91 )
92 data["trans_kwd"] = ", ".join(
93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"]
94 )
95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"]
98#####################################################################
99#
100# solrCmd: base class for Solr commands
101#
102######################################################################
103class solrCmd(baseCmd):
104 def __init__(self, params={}):
105 super().__init__(params)
107 def do(self, parent=None):
108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true
109 return None
111 return super().do(parent)
113 def post_do(self, resource=None):
114 super().post_do(resource)
116 def undo(self):
117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true
118 return None
120 return super().undo()
123#####################################################################
124#
125# solrDeleteCmd: generic to delete Solr documents, based on a query
126#
127######################################################################
128class solrDeleteCmd(solrCmd):
129 def __init__(self, params={}):
130 self.commit = True
131 self.q = None
133 super().__init__(params)
135 self.required_params.extend(["q"])
137 def internal_do(self):
138 super().internal_do()
140 solrFactory.get_solr().delete(q=self.q, commit=self.commit)
142 return None
145#####################################################################
146#
147# solrAddCmd: base class for Solr Add commands
148#
149######################################################################
150class solrAddCmd(solrCmd):
151 def __init__(self, params={}):
152 self.commit = True
153 self.db_obj = None
154 self.id = None
155 self.pid = None
156 self.data = {}
158 super().__init__(params)
160 self.required_params.extend(["id", "pid"])
161 self.required_delete_params.extend(["id"])
163 def pre_do(self):
164 super().pre_do()
166 self.data["id"] = self.id
167 self.data["pid"] = self.pid
168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID
169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do
170 cmd = solrDeleteCmd({"q": "pid:" + self.pid})
171 cmd.do()
173 def internal_do(self):
174 super().internal_do()
176 datas = [self.data]
178 solrFactory.get_solr().add(docs=datas, commit=self.commit)
180 return None
182 def internal_undo(self):
183 id = super().internal_undo()
185 solrFactory.get_solr().delete(id=self.id, commit=self.commit)
187 return id
190#####################################################################
191#
192# addResourceSolrCmd: base class for solrAddCmds adding a Resource
193#
194######################################################################
195class addResourceSolrCmd(solrAddCmd):
196 def __init__(self, params={}):
197 self.xobj = None # model_data object
199 # fields of the xobj to pass to SolR
200 self.fields = [
201 "lang",
202 "doi",
203 "title_tex",
204 "title_html",
205 "trans_title_tex",
206 "trans_title_html",
207 "abstract_tex",
208 "abstract_html",
209 "trans_abstract_tex",
210 "trans_abstract_html",
211 "collection_title_tex",
212 "collection_title_html",
213 "collection_id",
214 "year",
215 "body",
216 "bibitem",
217 ]
219 # Used to filter the articles based on their site
220 self.sites = None
222 super().__init__(params)
224 self.required_params.extend(["xobj"])
226 def add_collection(self, collection):
227 self.data["collection_id"] = collection.id
229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232, because the condition on line 229 was never false
230 self.data["collection_title_tex"] = [collection.title_tex]
231 else:
232 self.data["collection_title_tex"].append(collection.title_tex)
234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237, because the condition on line 234 was never false
235 self.data["collection_title_html"] = [collection.title_html]
236 else:
237 self.data["collection_title_html"].append(collection.title_html)
239 # classname is used only by PCJ for the article types
240 if collection.coltype == "journal":
241 self.data["dt"] = ["Article de revue"]
242 elif collection.coltype == "acta":
243 self.data["dt"] = ["Acte de séminaire"]
244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true
245 self.data["classname"] = "Thèse"
246 self.data["dt"] = ["Thèse"]
247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true
248 self.data["classname"] = "Notes de cours"
249 self.data["dt"] = ["Notes de cours"]
250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251, because the condition on line 250 was never true
251 self.data["classname"] = "Acte de rencontre"
252 self.data["dt"] = ["Acte de rencontre"]
253 else:
254 self.data["classname"] = "Livre"
255 self.data["dt"] = ["Livre"]
257 def add_abstracts_to_data(self):
258 for abstract in self.xobj.abstracts:
259 lang = abstract["lang"]
261 for field_type in ["tex", "html"]:
262 abstract_field = "value_" + field_type
263 field_name = "abstract_" + field_type
264 if lang != "fr":
265 field_name = "trans_" + field_name
267 self.data[field_name] = abstract[abstract_field]
269 def add_year_to_data(self, year):
270 if year:
271 years = str(year).split("-")
272 if len(years) > 1:
273 self.data["year_facet"] = int(years[1])
274 else:
275 self.data["year_facet"] = int(year)
277 def pre_do(self):
278 super().pre_do()
280 for field in self.fields:
281 if hasattr(self.xobj, field):
282 self.data[field] = getattr(self.xobj, field)
284 self.add_abstracts_to_data()
285 solr_add_kwds_to_data(self.xobj.kwds, self.data)
286 solr_add_contributors_to_data(self.xobj.contributors, self.data)
288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289, because the condition on line 288 was never true
289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}")
291 # year either comes directly from xobj (container) or from set_container
292 self.add_year_to_data(self.data["year"])
294 if self.db_obj is not None:
295 solr_fields = {
296 "application/pdf": "pdf",
297 "image/x.djvu": "djvu",
298 "application/x-tex": "tex",
299 }
300 for stream in self.xobj.streams:
301 mimetype = stream["mimetype"]
302 if mimetype in solr_fields:
303 href = self.db_obj.get_binary_file_href_full_path(
304 "self", mimetype, stream["location"]
305 )
306 self.data[solr_fields[mimetype]] = href
308 if self.db_obj is not None:
309 self.data["wall"] = self.db_obj.get_wall()
311 if self.sites:
312 self.data["sites"] = self.sites
313 else:
314 self.data["sites"] = [settings.SITE_ID]
317#####################################################################
318#
319# addContainerSolrCmd: adds/remove a container (issue/book)
320#
321# A container needs a collection (collection_title_tex etc.)
322#
323######################################################################
324class addContainerSolrCmd(addResourceSolrCmd):
325 def __init__(self, params={}):
326 super().__init__(params)
328 self.fields.extend(["ctype"])
329 # self.data["dt"] = ["Livre"]
331 def pre_do(self):
332 super().pre_do()
334 for field in ["volume", "number", "vseries"]:
335 if hasattr(self.xobj, field):
336 self.data["volume"] = make_int(getattr(self.xobj, field))
338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0:
339 incol = self.xobj.incollection[0]
340 self.data["vseries"] = make_int(incol.vseries)
341 self.data["volume"] = 0
342 self.data["number"] = make_int(incol.volume)
344 # if incol.coltype == "theses":
345 # self.data["dt"] = ["Thèse"]
348#####################################################################
349#
350# addArticleSolrCmd: adds/remove an article
351#
352# an article needs a container (container_id) that needs a collection (collection_id)
353#
354######################################################################
355class addArticleSolrCmd(addResourceSolrCmd):
356 def __init__(self, params={}):
357 super().__init__(params)
359 self.fields.extend(
360 ["page_range", "container_id", "volume", "number", "vseries", "article_number"]
361 )
362 # self.data["dt"] = ["Article"]
364 def set_container(self, container):
365 self.data["container_id"] = container.id
366 self.data["year"] = container.year
367 self.data["vseries"] = make_int(container.vseries)
368 self.data["volume"] = make_int(container.volume)
369 self.data["number"] = make_int(container.number)
371 def set_eprint(self, eprint):
372 self.data["dt"].append("e-print")
374 def set_source(self, source):
375 pass
377 def set_thesis(self, thesis):
378 self.data["dt"].append("thesis")
380 def set_original_article(self, article):
381 # TODO Replace some data (ie doi, pid) with the original article
382 pass
384 def pre_do(self):
385 super().pre_do()
387 self.data["classname"] = resolver.ARTICLE_TYPES.get(
388 self.xobj.atype, "Article de recherche"
389 )
391 self.data["page_range"] = ""
392 if not self.xobj.page_range:
393 self.data["page_range"] = "p. "
394 if self.xobj.fpage is not None: 394 ↛ 396line 394 didn't jump to line 396, because the condition on line 394 was never false
395 self.data["page_range"] += self.xobj.fpage
396 if self.xobj.fpage and self.xobj.lpage:
397 self.data["page_range"] += "-"
398 if self.xobj.lpage is not None: 398 ↛ exitline 398 didn't return from function 'pre_do', because the condition on line 398 was never false
399 self.data["page_range"] += self.xobj.lpage
400 elif self.xobj.page_range[0] != "p": 400 ↛ exitline 400 didn't return from function 'pre_do', because the condition on line 400 was never false
401 self.data["page_range"] = "p. " + self.xobj.page_range
404#####################################################################
405#
406# addBookPartSolrCmd: adds/remove an book part (similar to an article)
407#
408# a book part needs a collection id (array)
409#
410######################################################################
411class addBookPartSolrCmd(addResourceSolrCmd):
412 def __init__(self, params={}):
413 super().__init__(params)
415 self.fields.extend(
416 ["page_range", "container_title_tex", "container_title_html", "volume", "number"]
417 )
418 # self.data["dt"] = ["Chapitre de livre"]
420 def set_container(self, container):
421 self.data["container_id"] = container.id
422 self.data["year"] = container.year
423 self.data["volume"] = make_int(container.volume)
424 self.data["number"] = make_int(container.number)
425 self.data["container_title_tex"] = container.title_tex
426 self.data["container_title_html"] = container.title_html
428 def pre_do(self):
429 super().pre_do()
431 self.data["classname"] = "Chapitre de livre"
433 self.data["page_range"] = ""
434 if not self.xobj.page_range: 434 ↛ 442line 434 didn't jump to line 442, because the condition on line 434 was never false
435 self.data["page_range"] = "p. "
436 if self.xobj.fpage is not None: 436 ↛ 438line 436 didn't jump to line 438, because the condition on line 436 was never false
437 self.data["page_range"] += self.xobj.fpage
438 if self.xobj.fpage and self.xobj.lpage:
439 self.data["page_range"] += "-"
440 if self.xobj.lpage is not None: 440 ↛ exitline 440 didn't return from function 'pre_do', because the condition on line 440 was never false
441 self.data["page_range"] += self.xobj.lpage
442 elif self.xobj.page_range[0] != "p":
443 self.data["page_range"] = "p. " + self.xobj.page_range
446#####################################################################
447#
448# solrSearchCmd:
449#
450# called from ptf/views.py; SolrRequest(request, q, alias=alias,
451# site=site,
452# default={'sort': '-score'})
453#
454# Warning: As of July 2018, only 1 site id is stored in a SolR document
455# Although the SolR schema is already OK to store multiple sites ("sites" is an array)
456# no Solr commands have been written to add/remove sites
457# We only have add commands.
458# Search only works if the Solr instance is meant for individual or ALL sites
459#
460######################################################################
461class solrSearchCmd(solrCmd):
462 def __init__(self, params={}):
463 # self.q = '*:*'
464 self.q = ""
465 self.qs = None
466 self.filters = [] # TODO: implicit filters
467 self.start = None
468 self.rows = None
469 self.sort = "-score" # use ',' to specify multiple criteria
470 self.site = None
471 self.search_path = ""
473 super().__init__(params)
475 self.required_params.extend(["qs"])
477 def get_q(self, name, value, exclude, first, last):
478 if name == "all" and value == "*":
479 return "*:*"
481 if value == "*": 481 ↛ 482line 481 didn't jump to line 482, because the condition on line 481 was never true
482 value = ""
484 q = ""
485 if exclude:
486 q += "-"
488 if name == "date":
489 q += "year:[" + first + " TO " + last + "]"
491 else:
492 if name == "author":
493 q += "au:"
494 if name == "author_ref":
495 q += "ar:"
496 elif name == "title": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true
497 q += "title_tex:"
498 elif name == "body": 498 ↛ 499line 498 didn't jump to line 499, because the condition on line 498 was never true
499 q += "body:"
500 elif name == "references": 500 ↛ 501line 500 didn't jump to line 501, because the condition on line 500 was never true
501 q += "bibitem:"
502 elif name == "kwd": 502 ↛ 503line 502 didn't jump to line 503, because the condition on line 502 was never true
503 q += "kwd:"
504 if len(value) > 0 and value[0] == '"' and value[-1] == '"':
505 q += value
506 else:
507 terms = value.split()
508 # new_terms = [ "*{}*".format(t for t in terms)]
509 q += "(" + " AND ".join(terms) + ")"
511 return q
513 def internal_do(self) -> search_helpers.SearchResults:
514 super().internal_do()
516 if settings.COLLECTION_PID == "CR":
517 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"]
518 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids]
519 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]")
520 elif settings.COLLECTION_PID != "ALL":
521 self.filters.append(f"sites:{settings.SITE_ID}")
523 sort = "score desc"
524 if self.sort: 524 ↛ 537line 524 didn't jump to line 537, because the condition on line 524 was never false
525 sorts = []
526 sort_array = self.sort.split(",")
527 for spec in sort_array:
528 spec = spec.strip()
529 if spec[0] == "-": 529 ↛ 532line 529 didn't jump to line 532, because the condition on line 529 was never false
530 spec = f"{spec[1:]} desc"
531 else:
532 spec = f"{spec} asc"
533 sorts.append(spec)
534 sorts.append("year desc")
535 sort = ", ".join(sorts)
537 use_ar_facet = True
538 q = ""
539 if self.qs: 539 ↛ 546line 539 didn't jump to line 546, because the condition on line 539 was never false
540 for qi in self.qs:
541 if qi["name"] == "author_ref":
542 use_ar_facet = False
543 if qi["value"] or qi["first"]: 543 ↛ 540line 543 didn't jump to line 540, because the condition on line 543 was never false
544 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"])
545 q += new_q + " "
546 if q: 546 ↛ 549line 546 didn't jump to line 549, because the condition on line 546 was never false
547 self.q = q
549 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"]
551 if use_ar_facet:
552 facet_fields.append("ar")
554 if settings.COLLECTION_PID == "CR":
555 facet_fields.append("sites")
556 elif settings.COLLECTION_PID == "PCJ": 556 ↛ 557line 556 didn't jump to line 557, because the condition on line 556 was never true
557 facet_fields.append("classname")
559 params = {
560 "q.op": "AND",
561 "sort": sort,
562 "facet.field": facet_fields,
563 # Decades are built manually because we allow the user to
564 # expand a decade and see individual years
565 "facet.range": "year_facet",
566 "f.year_facet.facet.range.start": 0,
567 "f.year_facet.facet.range.end": 3000,
568 "f.year_facet.facet.range.gap": 10,
569 "facet.mincount": 1,
570 "facet.limit": 100,
571 "facet.sort": "count",
572 # 'fl': '*,score', # pour debug
573 # 'debugQuery': 'true', # pour debug
574 "hl": "true",
575 # 'hl.fl': "*", -> par defaut, retourne les champs de qf
576 "hl.snippets": 1,
577 "hl.fragsize": 300,
578 "hl.simple.pre": "<strong>",
579 "hl.simple.post": "</strong>",
580 "defType": "edismax",
581 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on
582 # ajoute 0.1 x le score des autres champs
583 # "df": 'text', Not used with dismax queries
584 # We want to retrieve the highlights in both _tex ad _html.
585 # We need to specify the 2 in qf
586 "qf": [
587 "au^21",
588 "title_tex^13",
589 "title_html^13",
590 "trans_title_tex^13",
591 "trans_title_html^13",
592 "abstract_tex^8",
593 "trans_abstract_tex^8",
594 "kwd^5",
595 "trans_kwd^5",
596 "collection_title_html^3",
597 "collection_title_tex^3",
598 "body^2",
599 "bibitem",
600 ],
601 # field ar est multivalué dédié aux facettes
602 # field au est utilisé pour la recherche et pour l'affichage
603 # des resultats
604 }
606 if self.start: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true
607 params["start"] = self.start
609 if self.rows:
610 params["rows"] = self.rows
612 if self.filters:
613 params["fq"] = self.filters
615 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)
617 search_results = search_helpers.SearchResults(
618 solr_results, self.search_path, self.filters, use_ar_facet
619 )
621 return search_results
624#####################################################################
625#
626# solrInternalSearchCmd:
627#
628# called from ptf/views.py/book by author
629#
630######################################################################
631class solrInternalSearchCmd(solrCmd):
632 def __init__(self, params={}):
633 self.q = "*:*"
634 self.qs = None
635 self.filters = [] # TODO: implicit filters
636 self.start = None
637 self.rows = None
638 self.sort = None # '-score' # use ',' to specify multiple criteria
639 self.site = None
640 self.search_path = ""
641 self.facet_fields = []
642 self.facet_limit = 100
643 self.fl = None
644 self.create_facets = True
645 # 10/03/2023 - UNUSED
646 self.related_articles = False
648 super().__init__(params)
650 self.required_params.extend(["q"])
652 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results:
653 super().internal_do()
655 # 10/03/2023 - UNUSED
656 if self.site: 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true
657 self.fq.append(f"sites:{self.site}")
659 the_facet_fields = []
660 use_year_facet = False
661 for field in self.facet_fields:
662 if field == "firstLetter":
663 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter")
664 elif field == "author_facet":
665 the_facet_fields.append("ar")
666 else:
667 the_facet_fields.append(field)
669 if field == "year_facet":
670 use_year_facet = True
672 # 10/03/2023 - UNUSED
673 if self.related_articles: 673 ↛ 674line 673 didn't jump to line 674
674 params = {
675 "q.op": "OR",
676 "hl": "true",
677 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd",
678 "hl.snippets": 1,
679 "hl.fragsize": 0,
680 "hl.simple.pre": "<strong>",
681 "hl.simple.post": "</strong>",
682 # "hl.method": "unified"
683 }
684 else:
685 params = {
686 "q.op": "AND",
687 # 'fl': '*,score', # pour debug
688 # 'debugQuery': 'true', # pour debug
689 "facet.field": the_facet_fields,
690 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'],
691 "facet.mincount": 1,
692 "facet.limit": self.facet_limit,
693 "facet.sort": "index",
694 }
696 if use_year_facet: 696 ↛ 708line 696 didn't jump to line 708, because the condition on line 696 was never false
697 # Decades are built manually because we allow the user to expand a
698 # decade and see individual years
699 params.update(
700 {
701 "facet.range": "year_facet",
702 "f.year_facet.facet.range.start": 0,
703 "f.year_facet.facet.range.end": 3000,
704 "f.year_facet.facet.range.gap": 10,
705 }
706 )
708 if self.sort: 708 ↛ 711line 708 didn't jump to line 711, because the condition on line 708 was never false
709 params["sort"] = self.sort
711 if self.start: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true
712 params["start"] = self.start
714 if self.rows: 714 ↛ 717line 714 didn't jump to line 717, because the condition on line 714 was never false
715 params["rows"] = self.rows
717 if self.filters:
718 params["fq"] = self.filters
720 if self.fl: 720 ↛ 721line 720 didn't jump to line 721, because the condition on line 720 was never true
721 params["fl"] = self.fl
723 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)
724 results = solr_results
726 if self.create_facets: 726 ↛ 731line 726 didn't jump to line 731, because the condition on line 726 was never false
727 results = search_helpers.SearchInternalResults(
728 solr_results, self.search_path, self.filters, self.facet_fields
729 )
731 return results
734#####################################################################
735#
736# solrGetDocumentByPidCmd:
737#
738#
739######################################################################
742class solrGetDocumentByPidCmd(solrCmd):
743 def __init__(self, params={}):
744 self.pid = None
746 super().__init__(params)
748 self.required_params.extend(["pid"])
750 def internal_do(self):
751 super().internal_do()
753 result = None
755 search = "pid:" + self.pid
756 results = solrFactory.get_solr().search(search)
758 if results is not None: 758 ↛ 764line 758 didn't jump to line 764, because the condition on line 758 was never false
759 docs = results.docs
761 if docs:
762 result = docs[0]
764 return result
767class updateResourceSolrCmd(solrAddCmd):
768 """ """
770 def __init__(self, params=None):
771 self.resource = None
773 super().__init__(params)
774 self.params = params
776 def set_resource(self, resource):
777 self.resource = resource
778 self.id = resource.id
779 self.pid = resource.pid
781 def pre_do(self):
782 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do()
783 if doc:
784 self.data = {**doc, **self.params}
785 if "_version_" in self.data:
786 del self.data["_version_"]
787 if "contributors" in self.data:
788 solr_add_contributors_to_data(self.data["contributors"], self.data)
789 self.data.pop("contributors")
790 # if 'kwd_groups' in self.data:
791 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data)
792 # self.data.pop('kwd_groups')
793 super().pre_do()
796def research_more_like_this(article):
797 results = {"docs": []}
798 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do()
799 if doc:
800 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body"
801 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all"
802 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true"
803 min_score = 80 if boost == "true" else 40
804 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score
805 params = {"debugQuery": "true", "mlt.interestingTerms": "details"}
806 params.update({"mlt.boost": boost, "fl": "*,score"})
807 params.update({"mlt.minwl": 4, "mlt.maxwl": 100})
808 params.update({"mlt.mintf": 2, "mlt.mindf": 2})
809 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50})
810 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"})
812 pid = article.pid.split("_")[0]
813 if pid[:2] == "CR":
814 # search suggested articles in all CR
815 params.update({"fq": r"pid:/CR.*/"})
816 else:
817 params.update({"fq": f"pid:/{pid}.*/"})
819 solr = solrFactory.get_solr()
820 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params)
821 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields})
822 params.update({"min_score": min_score})
823 results["params"] = dict(sorted(params.items()))
824 results["docs"] = similar.docs
825 results["numFound"] = similar.raw_response["response"]["numFound"]
826 results["interestingTerms"] = similar.raw_response["interestingTerms"]
827 results["explain"] = similar.debug["explain"]
828 return results
831def is_excluded_suggested_article(title):
832 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else []
833 start = (
834 settings.MLT_EXCLUDED_TITLES_START
835 if hasattr(settings, "MLT_EXCLUDED_TITLES_START")
836 else []
837 )
838 return title.startswith(tuple(start)) or title in match
841def auto_suggest_doi(suggest, article, results=None):
842 if not results: 842 ↛ 845line 842 didn't jump to line 845, because the condition on line 842 was never false
843 results = research_more_like_this(article)
845 if results and suggest.automatic_list: 845 ↛ 854line 845 didn't jump to line 854, because the condition on line 845 was never false
846 doi_list = []
847 for item in results["docs"][:3]: 847 ↛ 848line 847 didn't jump to line 848, because the loop on line 847 never started
848 if item["score"] > results["params"]["min_score"]:
849 doi = item.get("doi", "")
850 title = item.get("title_tex", "")
851 if doi not in doi_list and not is_excluded_suggested_article(title):
852 doi_list.append(doi)
853 suggest.doi_list = "\n".join(doi_list)
854 return results