Coverage for apps/ptf/citedby.py: 68%
479 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
1import concurrent.futures
2import html
3import re
4from collections import defaultdict
5from difflib import SequenceMatcher
7import requests
8import xmltodict
9from bs4 import BeautifulSoup
10from pylatexenc.latex2text import LatexNodes2Text
12from django.conf import settings
14from ptf.bibtex import parse_bibtex
15from ptf.cmds.xml.xml_utils import normalise_span
16from ptf.model_data import RefData
17from ptf.model_data import create_contributor
18from ptf.model_data_converter import update_ref_data_for_jats
19from ptf.model_helpers import get_extid
20from ptf.models import BibItemId
21from ptf.models import get_names
22from ptf.utils import get_display_name
24ADS_URL = "https://api.adsabs.harvard.edu/v1/search"
25ARXIV_URL = "https://export.arxiv.org/api/query"
26CROSSREF_URL = "https://doi.crossref.org/servlet/getForwardLinks"
27SEMANTIC_URL = "https://api.semanticscholar.org/v1/paper/"
28ZBMATH_URL = "https://zbmath.org"
30ADS = "NASA ADS"
31CROSSREF = "Crossref"
32SEMANTIC = "Semantic Scholar"
33ZBMATH = "zbMATH"
36TIMEOUT = 4.0
38PRIORITY = defaultdict(int, {ZBMATH: 10, ADS: 9, CROSSREF: 8, SEMANTIC: 7})
40LATEX_PARSER = LatexNodes2Text(math_mode="verbatim")
43def create_refdata(lang="und"):
44 data = RefData(lang=lang)
45 data.type = "misc"
46 data.doi = None
47 data.arxiv = None
48 data.zbl = None
49 data.semantic = None
50 return data
53def is_same_title(compare, titles, tol=0.90):
54 compare = re.sub(r"\W", "", compare).lower()
55 for title in titles:
56 title = re.sub(r"\W", "", title).lower()
57 if SequenceMatcher(None, compare, title).ratio() > tol:
58 return True
59 return False
62def get_zbmath_bibtex(params):
63 text = ""
64 headers = {"Content-Type": "text/html"}
65 response = requests.get(ZBMATH_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)
66 soup = BeautifulSoup(response.text, "html.parser")
67 results = soup.find("div", {"class": "citations"})
68 if results: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true
69 for ref in results.find_all("a", href=True):
70 headers = {"Content-Type": "text/x-bibtex"}
71 url = ZBMATH_URL + "/bibtexoutput" + ref.get("href", "")
72 response = requests.get(url, headers=headers, timeout=0.5 * TIMEOUT)
73 response.encoding = "utf-8"
74 text += response.text
75 return text
78def citedby_zbmath(metadata):
79 if "zbl_id" in metadata: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true
80 params = {"q": "an:" + metadata["zbl_id"]}
81 else:
82 params = {"q": "en:" + metadata["doi"]}
83 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")
84 authors = "&au:".join(metadata["authors"])
85 params = {"q": params["q"] + "|(ti:" + f'"{title_tex}"' + "&au:" + authors + ")"}
86 text = get_zbmath_bibtex(params)
87 citations = parse_bibtex(text)
88 return citations
91def citedby_crossref(metadata):
92 citations = []
93 user = settings.CROSSREF_USER
94 password = settings.CROSSREF_PWD
95 params = {"usr": user, "pwd": password, "doi": metadata["doi"]}
96 response = requests.post(CROSSREF_URL, params=params, timeout=TIMEOUT)
97 response.encoding = "utf-8"
98 if response.status_code == 200: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 data = xmltodict.parse(response.text)
100 body = data["crossref_result"]["query_result"]["body"]
101 if body:
102 citations = body["forward_link"]
104 if not isinstance(citations, list): 104 ↛ 105line 104 didn't jump to line 105, because the condition on line 104 was never true
105 citations = [citations]
106 return citations
109def get_arxiv_id(metadata):
110 arxiv_id = None
111 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")
112 headers = {"Content-Type": "application/atom+xml"}
113 query = "doi:" + metadata["doi"] + " OR (ti:" + f'"{title_tex}"' + ")"
114 params = {"search_query": query, "max_results": 1}
115 response = requests.get(ARXIV_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)
116 if response.status_code == 200: 116 ↛ 123line 116 didn't jump to line 123, because the condition on line 116 was never false
117 data = xmltodict.parse(response.text)
118 if "entry" in data["feed"]: 118 ↛ 119line 118 didn't jump to line 119, because the condition on line 118 was never true
119 entry = data["feed"]["entry"]
120 if is_same_title(title_tex, [entry["title"]]):
121 arxiv_id = entry["id"].split("arxiv.org/abs/")
122 arxiv_id = arxiv_id[-1].split("v")[0]
123 return arxiv_id
126def citedby_ads(metadata, by_doi=True, citedby=True):
127 if by_doi: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 arxiv_id = get_arxiv_id(metadata)
129 else:
130 arxiv_id = metadata["arxiv_id"]
131 if not arxiv_id: 131 ↛ 134line 131 didn't jump to line 134, because the condition on line 131 was never false
132 return []
134 citations = []
135 url = ADS_URL + "/query"
136 headers = {"Authorization": f"Bearer:{settings.ADS_TOKEN}"}
137 reference = "citation" if citedby else "reference"
138 params = {"q": "identifier:" + arxiv_id, "fl": reference}
139 response = requests.get(url, headers=headers, params=params, timeout=0.5 * TIMEOUT)
140 if response.status_code == 200:
141 results = response.json().get("response", {}).get("docs")
142 if results and isinstance(results, list) and reference in results[0]:
143 url = ADS_URL + "/bigquery"
144 bibcodes = "bibcode\n" + "\n".join(results[0][reference])
145 filters = "abstract,author,bibcode,comment,doi,doctype,"
146 filters += "eid,identifier,issue,keyword,orcid_pub,"
147 filters += "page,page_count,page_range,pub,pub_raw,title,volume,year"
148 params = {"q": "*:*", "fl": filters, "rows": 200}
149 response = requests.post(
150 url, params=params, headers=headers, data=bibcodes, timeout=0.5 * TIMEOUT
151 )
152 response.encoding = "utf-8"
153 if response.status_code == 200:
154 citations = response.json().get("response", {}).get("docs")
155 return citations
158def citedby_semantic(metadata, citedby=True):
159 citations = []
160 reference = "citations" if citedby else "references"
161 if settings.SITE_ID != 36: # all but PCJ
162 response = requests.get(SEMANTIC_URL + metadata["doi"], timeout=TIMEOUT)
163 response.encoding = "utf-8"
164 if response.status_code == 200:
165 citations.extend(response.json()[reference])
166 return citations
169def set_contributors(ref, api_contributors, orcids=None):
170 if not isinstance(api_contributors, list):
171 api_contributors = [api_contributors]
173 contributors = []
174 for contributor in api_contributors:
175 first_name = last_name = ""
176 if ref.provider == CROSSREF:
177 first_name = contributor.get("given_name")
178 last_name = contributor.get("surname")
179 elif ref.provider in [ADS, ZBMATH]:
180 result = contributor.split(", ")
181 if result: 181 ↛ 189line 181 didn't jump to line 189, because the condition on line 181 was never false
182 first_name = result[1] if len(result) > 1 else ""
183 last_name = result[0]
184 elif ref.provider == SEMANTIC: 184 ↛ 189line 184 didn't jump to line 189, because the condition on line 184 was never false
185 result = contributor["name"].split(" ")
186 if result: 186 ↛ 189line 186 didn't jump to line 189, because the condition on line 186 was never false
187 first_name = " ".join(result[0:-1])
188 last_name = result[-1]
189 contributor = create_contributor()
190 contributor["first_name"] = first_name.strip() if first_name else ""
191 contributor["last_name"] = last_name.strip() if last_name else ""
192 contributor["role"] = "author"
193 contributors.append(contributor)
195 if orcids and len(contributors) == len(orcids):
196 for contrib, orcid in zip(contributors, orcids):
197 contrib["orcid"] = orcid if orcid != "-" else ""
198 setattr(ref, "contributors", contributors)
201def ads_to_bibtex_type(doc_type):
202 if doc_type in ["article", "eprint"]: 202 ↛ 204line 202 didn't jump to line 204, because the condition on line 202 was never false
203 bibtex_type = "article"
204 elif doc_type in [
205 "book",
206 "inbook",
207 "inproceedings",
208 "mastersthesis",
209 "phdthesis",
210 "proceedings",
211 "techreport",
212 ]:
213 bibtex_type = doc_type
214 else:
215 bibtex_type = "misc"
216 return bibtex_type
219def crossref_to_bibtex_type(doc_type, item):
220 if doc_type == "journal_cite": 220 ↛ 222line 220 didn't jump to line 222, because the condition on line 220 was never false
221 bibtex_type = "article"
222 elif doc_type == "conf_cite":
223 if "paper_title" in item:
224 bibtex_type = "inproceedings"
225 else:
226 bibtex_type = "proceedings"
227 elif doc_type == "book_cite":
228 if "chapter_title" in item:
229 bibtex_type = "inbook"
230 else:
231 bibtex_type = "book"
232 else:
233 bibtex_type = "misc"
234 return bibtex_type
237def citedby_crossref_refs(citations):
238 refdata = []
239 for item in citations:
240 item.pop("@doi") # the interior orderdict remains
241 if not item: 241 ↛ 242line 241 didn't jump to line 242, because the condition on line 241 was never true
242 continue
243 doc_type, item = item.popitem()
244 ref = create_refdata()
245 setattr(ref, "provider", CROSSREF)
246 setattr(ref, "type", crossref_to_bibtex_type(doc_type, item))
247 if "journal_title" in item and item["journal_title"]: 247 ↛ 249line 247 didn't jump to line 249, because the condition on line 247 was never false
248 setattr(ref, "source_tex", item["journal_title"])
249 if "article_title" in item and item["article_title"]: 249 ↛ 251line 249 didn't jump to line 251, because the condition on line 249 was never false
250 setattr(ref, "article_title_tex", item["article_title"])
251 if "volume_title" in item: # book or proceedings title 251 ↛ 252line 251 didn't jump to line 252, because the condition on line 251 was never true
252 setattr(ref, "source_tex", item["volume_title"])
253 if "paper_title" in item and item["paper_title"]: # inproceedings title 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true
254 setattr(ref, "article_title_tex", item["paper_title"])
255 if "chapter_title" in item and item["chapter_title"]: # incollection or inbook 255 ↛ 256line 255 didn't jump to line 256, because the condition on line 255 was never true
256 setattr(ref, "chapter_title_tex", item["chapter_title"])
257 if "first_page" in item: 257 ↛ 259line 257 didn't jump to line 259, because the condition on line 257 was never false
258 setattr(ref, "fpage", item["first_page"])
259 if "last_page" in item: 259 ↛ 260line 259 didn't jump to line 260, because the condition on line 259 was never true
260 setattr(ref, "lpage", item["last_page"])
261 if "volume" in item: 261 ↛ 263line 261 didn't jump to line 263, because the condition on line 261 was never false
262 setattr(ref, "volume", item["volume"])
263 if "issue" in item:
264 setattr(ref, "issue", item["issue"])
265 if "year" in item and item["year"]: 265 ↛ 267line 265 didn't jump to line 267, because the condition on line 265 was never false
266 setattr(ref, "year", item["year"])
267 if "contributors" in item and "contributor" in item["contributors"]: 267 ↛ 269line 267 didn't jump to line 269, because the condition on line 267 was never false
268 set_contributors(ref, item["contributors"]["contributor"])
269 if "doi" in item and item["doi"]: 269 ↛ 271line 269 didn't jump to line 271, because the condition on line 269 was never false
270 setattr(ref, "doi", item["doi"]["#text"].lower())
271 refdata.append(ref)
272 return refdata
275def citedby_zbmath_refs(citations):
276 return bibtex_to_refs(citations)
279def is_misc(doctype):
280 if doctype not in [ 280 ↛ 294line 280 didn't jump to line 294, because the condition on line 280 was never true
281 "article",
282 "book",
283 # "booklet",
284 "conference",
285 "inbook",
286 "incollection",
287 "inproceedings",
288 # "manual",
289 # "mastersthesis",
290 "phdthesis",
291 "proceedings",
292 "techreport",
293 ]:
294 return True
295 return False
298def bibtex_to_refs(bibitems):
299 refdata = []
300 for item in bibitems:
301 ref = create_refdata()
302 setattr(ref, "provider", ZBMATH)
303 item["doctype"] = "misc" if is_misc(item["doctype"]) else item["doctype"]
304 setattr(ref, "type", item["doctype"])
305 if "fjournal" in item: 305 ↛ 307line 305 didn't jump to line 307, because the condition on line 305 was never false
306 setattr(ref, "source_tex", item["fjournal"])
307 elif "journal" in item:
308 setattr(ref, "source_tex", item["journal"])
309 elif "booktitle" in item:
310 setattr(ref, "source_tex", item["booktitle"])
311 elif "howpublished" in item:
312 howpublished = re.sub(r" \([0-9]{4}\)\.?", "", item["howpublished"])
313 setattr(ref, "source_tex", howpublished)
314 if "fseries" in item: 314 ↛ 315line 314 didn't jump to line 315, because the condition on line 314 was never true
315 setattr(ref, "series", item["fseries"])
316 elif "series" in item: 316 ↛ 317line 316 didn't jump to line 317, because the condition on line 316 was never true
317 setattr(ref, "series", item["series"])
318 if "title" in item: 318 ↛ 325line 318 didn't jump to line 325, because the condition on line 318 was never false
319 if item["doctype"] in ["article", "misc"]: 319 ↛ 321line 319 didn't jump to line 321, because the condition on line 319 was never false
320 setattr(ref, "article_title_tex", item["title"])
321 elif item["doctype"] in ["incollection", "inproceedings", "inbook"]:
322 setattr(ref, "chapter_title_tex", item["title"])
323 else:
324 setattr(ref, "source_tex", item["title"])
325 if "url" in item and not ref.source_tex: 325 ↛ 326line 325 didn't jump to line 326, because the condition on line 325 was never true
326 setattr(ref, "source_tex", item["url"])
327 if "pages" in item and item["pages"]: 327 ↛ 332line 327 didn't jump to line 332, because the condition on line 327 was never false
328 result = [x for x in re.split(r"\W", item["pages"])]
329 setattr(ref, "fpage", result[0])
330 if len(result) == 2:
331 setattr(ref, "lpage", result[1])
332 if "volume" in item: 332 ↛ 334line 332 didn't jump to line 334, because the condition on line 332 was never false
333 setattr(ref, "volume", item["volume"])
334 if "number" in item:
335 setattr(ref, "issue", item["number"])
336 if "issue" in item: 336 ↛ 337line 336 didn't jump to line 337, because the condition on line 336 was never true
337 setattr(ref, "issue", item["issue"])
338 if "note" in item:
339 setattr(ref, "comment", item["note"])
340 if "year" in item: 340 ↛ 342line 340 didn't jump to line 342, because the condition on line 340 was never false
341 setattr(ref, "year", item["year"])
342 if "author" in item: 342 ↛ 344line 342 didn't jump to line 344, because the condition on line 342 was never false
343 set_contributors(ref, item["author"].split(" and "))
344 if "publisher" in item: 344 ↛ 345line 344 didn't jump to line 345, because the condition on line 344 was never true
345 setattr(ref, "publisher_name", item["publisher"])
346 elif "school" in item: 346 ↛ 347line 346 didn't jump to line 347, because the condition on line 346 was never true
347 setattr(ref, "publisher_name", item["school"])
348 elif "institution" in item: 348 ↛ 349line 348 didn't jump to line 349, because the condition on line 348 was never true
349 setattr(ref, "publisher_name", item["institution"])
350 if "address" in item: 350 ↛ 351line 350 didn't jump to line 351, because the condition on line 350 was never true
351 setattr(ref, "publisher_loc", item["address"])
352 if "doi" in item and item["doi"]:
353 setattr(ref, "doi", item["doi"].lower())
354 if "zbmath" in item: 354 ↛ 356line 354 didn't jump to line 356, because the condition on line 354 was never false
355 setattr(ref, "zbl", item["zbmath"])
356 if "zbl" in item:
357 setattr(ref, "zbl", item["zbl"])
358 refdata.append(ref)
359 return refdata
362def citedby_ads_refs(citations):
363 refdata = []
364 for item in citations:
365 ref = create_refdata()
366 setattr(ref, "provider", ADS)
367 setattr(ref, "bibcode", item["bibcode"])
368 setattr(ref, "type", ads_to_bibtex_type(item["doctype"]))
369 if "title" in item and item["title"]: 369 ↛ 371line 369 didn't jump to line 371, because the condition on line 369 was never false
370 setattr(ref, "article_title_tex", item["title"][0])
371 if "page_range" in item: 371 ↛ 372line 371 didn't jump to line 372, because the condition on line 371 was never true
372 result = item["page_range"].split("-")
373 if len(result) == 2:
374 setattr(ref, "fpage", result[0])
375 setattr(ref, "lpage", result[1])
376 elif "page" in item and item["page"] and item["page"][0].isdigit():
377 setattr(ref, "fpage", item["page"][0])
378 if "page_count" in item and item["page_count"]:
379 setattr(ref, "lpage", str(item["page_count"] - 1))
380 if "year" in item and item["year"]: 380 ↛ 382line 380 didn't jump to line 382, because the condition on line 380 was never false
381 setattr(ref, "year", item["year"])
382 if "author" in item and item["author"]: 382 ↛ 384line 382 didn't jump to line 384, because the condition on line 382 was never false
383 set_contributors(ref, item["author"], item.get("orcid_pub", []))
384 if "issue" in item: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true
385 setattr(ref, "issue", item["issue"])
386 if "volume" in item: 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true
387 setattr(ref, "volume", item["volume"])
388 if "doi" in item and item["doi"]: 388 ↛ 390line 388 didn't jump to line 390, because the condition on line 388 was never false
389 setattr(ref, "doi", item["doi"][0].lower())
390 if "eid" in item and item["eid"]: 390 ↛ 395line 390 didn't jump to line 395, because the condition on line 390 was never false
391 arxiv = item["eid"].split("arXiv:")
392 if "pub" in item and "arXiv" in item["pub"]: 392 ↛ 395line 392 didn't jump to line 395, because the condition on line 392 was never false
393 setattr(ref, "arxiv", arxiv[-1])
394 setattr(ref, "source_tex", "arXiv")
395 if "pub_raw" in item and item["pub_raw"] and ref.doi and not ref.arxiv: 395 ↛ 396line 395 didn't jump to line 396, because the condition on line 395 was never true
396 result = re.match(r"(^.+)?[,.]( vol. | Volume )", item["pub_raw"])
397 if result:
398 setattr(ref, "source_tex", result.group(1))
399 elif "pub" in item and not ref.arxiv: 399 ↛ 400line 399 didn't jump to line 400, because the condition on line 399 was never true
400 setattr(ref, "source_tex", item["pub"])
401 if "abstract" in item and item["abstract"]: 401 ↛ 403line 401 didn't jump to line 403, because the condition on line 401 was never false
402 setattr(ref, "abstract", [item["abstract"]])
403 refdata.append(ref)
404 return refdata
407def citedby_semantic_refs(citations):
408 refdata = []
409 for item in citations:
410 ref = create_refdata()
411 setattr(ref, "provider", SEMANTIC)
412 if "title" in item: 412 ↛ 416line 412 didn't jump to line 416, because the condition on line 412 was never false
413 title = item["title"]
414 title = title.capitalize() if title.isupper() else item["title"]
415 setattr(ref, "article_title_tex", title)
416 if "year" in item and item["year"]: 416 ↛ 418line 416 didn't jump to line 418, because the condition on line 416 was never false
417 setattr(ref, "year", str(item["year"]))
418 if "authors" in item and item["authors"]: 418 ↛ 420line 418 didn't jump to line 420, because the condition on line 418 was never false
419 set_contributors(ref, item["authors"])
420 if "doi" in item and item["doi"]:
421 setattr(ref, "doi", item["doi"].lower())
422 if "arxivId" in item and item["arxivId"]:
423 setattr(ref, "arxiv", item["arxivId"])
424 setattr(ref, "source_tex", "arXiv")
425 if "venue" in item and item["venue"]:
426 setattr(ref, "source_tex", item["venue"])
427 if "paperId" in item: 427 ↛ 429line 427 didn't jump to line 429, because the condition on line 427 was never false
428 setattr(ref, "semantic", item["paperId"])
429 refdata.append(ref)
430 return refdata
433def get_extlinks(extids):
434 extlinks = []
435 for extid in extids:
436 eid = BibItemId()
437 eid.id_type, eid.id_value = extid
438 extlink = ""
439 if eid.id_type == "doi":
440 extlink = "DOI:" + eid.id_value
441 elif eid.id_type == "arxiv":
442 extlink = "arXiv:" + eid.id_value
443 elif eid.id_type == "zbl-item-id":
444 extlink = "Zbl:" + eid.id_value
445 elif eid.id_type == "semantic-scholar": 445 ↛ 447line 445 didn't jump to line 447, because the condition on line 445 was never false
446 extlink = "Semantic-scholar:" + eid.id_value
447 if extlink: 447 ↛ 435line 447 didn't jump to line 435, because the condition on line 447 was never false
448 extlink = f' | <a href="{eid.get_href()}">{extlink}</a>'
449 extlinks.append(extlink)
450 return extlinks
453def built_extlinks(ref):
454 extids = []
455 if ref.doi:
456 extids.append(("doi", ref.doi))
457 if ref.arxiv:
458 extids.append(("arxiv", ref.arxiv))
459 if ref.zbl:
460 extids.append(("zbl-item-id", ref.zbl))
461 if not any((ref.doi, ref.zbl, ref.arxiv)) and getattr(ref, "semantic", False):
462 extids.append(("semantic-scholar", ref.semantic))
463 setattr(ref, "extids", extids)
466def get_values_for_stats(refs):
467 """
468 extract data of a ref and return as a dict
469 @param refs: dict of RefData.__dict__
470 @return: dict
471 """
473 citedby_for_stats = []
474 for ref_item in refs.values():
475 authors = []
476 for author in ref_item.get("contributors"):
477 if author["role"] == "author": 477 ↛ 476line 477 didn't jump to line 476, because the condition on line 477 was never false
478 display_name = get_display_name(
479 author["prefix"],
480 author["first_name"],
481 author["last_name"],
482 author["suffix"],
483 author["string_name"],
484 )
485 authors.append({"author": display_name})
487 title_key = get_publication_title(ref_item, "title")
488 title = ref_item[title_key]
489 publication_title_key = get_publication_title(ref_item, "publication_title")
490 publication_title = ref_item[publication_title_key]
492 url = ""
493 if ref_item["extlinks"]: 493 ↛ 497line 493 didn't jump to line 497
494 result = re.search(r'href="(.+)">', ref_item["extlinks"][0])
495 url = result.group(1) if result else ""
497 result = {
498 "authors": authors,
499 "title": title,
500 "publication_title": publication_title,
501 "year": ref_item["year"],
502 "url": url,
503 "source": ref_item["provider"],
504 }
505 citedby_for_stats.append(result)
506 return citedby_for_stats
509def get_publication_title(ref_item, category="title"):
510 type_ = ref_item.get("type")
512 if "thesis" in type_: 512 ↛ 513line 512 didn't jump to line 513, because the condition on line 512 was never true
513 type_ = "thesis"
514 else:
515 type_ = "misc"
517 dic = {
518 "incollection": {"title": "source_tex", "publication_title": "series"},
519 "thesis": {"title": "source_tex", "publication_title": "series"},
520 "article": {"title": "article_title_tex", "publication_title": "source_tex"},
521 "book": {"title": "source_tex", "publication_title": "series"},
522 "inbook": {"title": "chapter_title_tex", "publication_title": "series"},
523 "misc": {"title": "article_title_tex", "publication_title": "source_tex"},
524 }
525 return dic.get(type_).get(category)
528def built_citations(data):
529 # to match citations and add these ids when missing
530 doi_arxiv = {ref.doi: ref.arxiv for ref in data if ref.doi and ref.arxiv}
531 arxiv_doi = {v: k for k, v in doi_arxiv.items()}
533 results = []
534 for n, ref in enumerate(data):
535 if ref.arxiv and not ref.doi:
536 setattr(ref, "doi", arxiv_doi.get(ref.arxiv))
537 elif not ref.arxiv and ref.doi:
538 setattr(ref, "arxiv", doi_arxiv.get(ref.doi))
539 built_extlinks(ref)
540 update_ref_data_for_jats(ref, n, with_label=False)
541 ref.citation_html = html.unescape(ref.citation_html)
542 results.append(vars(ref))
544 results.sort(
545 key=lambda k: (
546 -int(k["year"]) if k["year"] else 0,
547 k["source_tex"],
548 k["volume"],
549 k["issue"],
550 k["fpage"],
551 ),
552 )
554 refs = {}
555 titles = {
556 item[get_publication_title(item)]
557 for item in results
558 if any((item["arxiv"], item["doi"], item["zbl"]))
559 }
561 for item in results:
562 links = get_extlinks(item["extids"])
563 level = PRIORITY[item["provider"]]
564 citation = LATEX_PARSER.latex_to_text(item["citation_html"].replace("$$", "$"))
565 ref = {"html": citation + "".join(links)}
566 ref.update({"priority": level, "extlinks": links})
567 ref.update(item)
569 if item["doi"]:
570 if item["doi"] not in refs or refs[item["doi"]]["priority"] < level:
571 refs[item["doi"]] = ref
572 elif item["zbl"]:
573 refs[item["zbl"]] = ref
574 elif item["arxiv"]:
575 if item["arxiv"] not in refs or refs[item["arxiv"]]["priority"] < level: 575 ↛ 561line 575 didn't jump to line 561, because the condition on line 575 was never false
576 refs[item["arxiv"]] = ref
577 elif item["semantic"] and (item["doi"] or item["arxiv"]): 577 ↛ 578line 577 didn't jump to line 578, because the condition on line 577 was never true
578 if not is_same_title(item[get_publication_title(item)], titles):
579 refs[item["semantic"]] = ref
581 sources = list({ref["provider"] for ref in refs.values()})
582 sources = ", ".join(sorted(sources))
583 citations_html = [citation["html"] for citation in refs.values()]
584 citedby_for_stats = get_values_for_stats(refs)
585 return citations_html, sources, citedby_for_stats
588def citations_to_refs(provider, citations):
589 if provider == CROSSREF:
590 return citedby_crossref_refs(citations)
591 elif provider == ZBMATH:
592 return citedby_zbmath_refs(citations)
593 elif provider == ADS:
594 return citedby_ads_refs(citations)
595 elif provider == SEMANTIC:
596 return citedby_semantic_refs(citations)
599def get_citations(resource):
600 """Returns documents that cite this doi and sources used for the research."""
601 data = {}
602 authors = get_names(resource, "author")
603 zbl_id = get_extid(resource, "zbl-item-id")
604 preprint_id = get_extid(resource, "preprint")
606 metadata = {
607 "authors": authors,
608 "doi": resource.doi,
609 "preprint_id": preprint_id.id_value if preprint_id else "",
610 "title": resource.title_tex,
611 }
613 if zbl_id and zbl_id.id_value: 613 ↛ 614line 613 didn't jump to line 614, because the condition on line 613 was never true
614 metadata.update({"zbl_id": zbl_id.id_value})
616 with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
617 future_to_provider = {
618 executor.submit(citedby_crossref, metadata): CROSSREF,
619 executor.submit(citedby_zbmath, metadata): ZBMATH,
620 executor.submit(citedby_ads, metadata): ADS,
621 }
622 for future in concurrent.futures.as_completed(future_to_provider):
623 provider = future_to_provider[future]
624 try:
625 if future.result(): 625 ↛ 626line 625 didn't jump to line 626, because the condition on line 625 was never true
626 data.update({provider: future.result()})
627 except requests.exceptions.Timeout:
628 continue
629 except requests.exceptions.ConnectionError:
630 continue
632 citations = []
633 for provider, cites in data.items(): 633 ↛ 634line 633 didn't jump to line 634, because the loop on line 633 never started
634 citations.extend(citations_to_refs(provider, cites))
636 return built_citations(citations)