Coverage for sites/ptf_tools/ptf_tools/doi.py: 19%
245 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
1import os
2from datetime import datetime
4import requests
5from lxml import etree
7from django.conf import settings
8from django.core.exceptions import ObjectDoesNotExist
9from django.template.loader import render_to_string
11from mersenne_tools.models import DOIBatch
12from ptf.display.resolver import find_id_type
15def get_doibatch(resource):
16 doibatch = None
17 try:
18 doibatch = resource.doibatch
19 except ObjectDoesNotExist:
20 pass
22 return doibatch
25def get_or_create_doibatch(resource):
26 """
27 @param models.Resource:
28 @return: new or updated doibatch
29 """
31 update_doi = False
32 # check DOI
33 url = settings.DOI_BASE_URL + resource.doi
34 r = requests.get(url, allow_redirects=False)
35 if r.status_code == 302 and resource.get_url_absolute() == r.headers["Location"]:
36 status = "Enregistré"
37 log = "Vérifié sur CROSSREF"
38 update_doi = True
39 elif r.status_code == 302 and resource.get_url_absolute() != r.headers["Location"]:
40 status = "Erreur"
41 log = "Mauvaise URL pour le DOI !!!/à réenregistrer"
42 update_doi = True
43 doibatch = get_doibatch(resource)
44 if update_doi:
45 if doibatch:
46 doibatch.status = status
47 doibatch.log = log
48 else:
49 doibatch = DOIBatch(resource=resource, status=status, log=log)
50 doibatch.save()
51 return doibatch
53 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch
54 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part
55 if (
56 doibatch
57 and resource.classname == "Article"
58 and resource.my_container.ctype.startswith("book")
59 ):
60 doibatch.delete()
61 doibatch = None
63 if doibatch:
64 doibatch = checkDOIBatch(doibatch)
66 return doibatch
69# recordDOI par resource (article)
70# problématique liée à l'enregistrement des DOI chez CROSSREF :
71# - pour enregistrer un DOI, on utilise le DOI du journal comme référence : CROSSREF prend ça comme une demande d'enregistrement/modification !
72# du DOI du journal...
73# ce qui se passe lorsque l'on envoie plusieurs requêtes les unes à la suite des autres (Record all DOIs), c'est que l'ordre de traitement est
74# différent (aléatoire) de l'ordre d'envoi et on obtient ces erreurs :
75# "Record not processed because submitted version: 201810150907372216 is less or equal to previously submitted version {1}"
76# ( MAIS le record impliqué ici est celui du journal, celui de l'article ne pose globalement pas de pb)
77# car il y a un timestamp dans chaque requête
78#
79# pour contrer ces erreurs (avant on ne diagnostiquait que le nombre de failure_count et donc il y en avait une) il faut interpréter le xml de retour ::
80# <record_diagnostic status="Success">
81# <doi>10.5802/alco.21</doi>
82# <msg>Successfully updated</msg>
83# C'est ce qui est retenu (dans checkDOIBatch).
84#
87def recordDOI(resource, testing=False):
88 """
89 @param resource:
90 @param testing: Boolean set to True when testing
91 @return: data {status: 200 ou 400, 'message': msg}
92 """
94 doibatch = get_doibatch(resource)
95 if doibatch: 95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never false
96 doibatch.delete()
98 doibatch = DOIBatch(resource=resource, status="En cours")
99 doibatch.save()
100 context = {}
101 context["doi_batch_id"] = f"{doibatch.pk:04d}"
102 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp
103 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
104 context["timestamp"] = timestamp[0:19]
105 context["mail"] = settings.CROSSREF_MAIL
106 template = f"crossref/{resource.classname.lower()}_doi_register.xml"
107 crossref_user = None
108 crossref_pwd = None
110 # hack pour déterminer la date de publication pour une resource
111 if resource.classname == "Article": 111 ↛ 154line 111 didn't jump to line 154, because the condition on line 111 was never false
112 # si un article n'a pas de contributeurs, on enregistre un posted-content de type other
113 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#posted_content
114 if not resource.get_author_contributions() and resource.classname == "Article": 114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true
115 template = "crossref/posted-content.xml"
117 # on est en présence d'un objet qui a besoin d'une date de publication
118 if not resource.date_published and not resource.date_online_first: 118 ↛ 120line 118 didn't jump to line 120, because the condition on line 118 was never true
119 # on extrapole la date du volume
120 date = resource.my_container.year
121 try:
122 date = datetime.strptime(date, "%Y")
123 resource.DOIdate = "<year>%s</year>" % resource.my_container.year
124 except ValueError:
125 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
126 year = resource.my_container.year.split("-")[1]
127 resource.DOIdate = "<year>%s</year>" % year
128 resource.my_container.year = year
129 else:
130 # on renseigne la date selon le format voulu par CROSSREF
131 if resource.date_published: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true
132 resource.DOIdate = resource.date_published.strftime(
133 "<month>%m</month><day>%d</day><year>%Y</year>"
134 )
136 # on check aussi la date du container
137 date = resource.my_container.year
138 try:
139 date = datetime.strptime(date, "%Y")
140 except ValueError:
141 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
142 year = resource.my_container.year.split("-")[1]
143 resource.my_container.year = year
144 else:
145 # Online First
146 # TODO: Is it possible to send 2 dates to Crossref ?
147 # You can send multiple <publication_date> but it is for multiple media_type (print vs online)
148 resource.DOIdate = resource.date_online_first.strftime(
149 "<month>%m</month><day>%d</day><year>%Y</year>"
150 )
152 # Le year du container vaut '0'
154 elif resource.classname == "Container":
155 if resource.ctype.startswith("book"):
156 # PS : pas de gestion des chapitres pour les livres, tout est fait dans le template au moment de l'enregistrement du book
157 # template en fct du ctype
158 if resource.my_collection.issn or resource.my_collection.e_issn:
159 template = "crossref/book_series_metadata.xml"
160 else:
161 template = "crossref/book_set_metadata.xml"
162 # else #book tout seul n'appartenant pas à une série
163 # template = book_metadata
164 context["book_type"] = resource.ctype[5:].replace("-", "_")
165 for bookpart in resource.article_set.all():
166 doibatch = get_doibatch(bookpart)
167 if doibatch:
168 doibatch.delete()
169 doibatch = DOIBatch(resource=bookpart, status="En cours")
170 doibatch.save()
172 elif resource.ctype == "issue":
173 # TODO
174 template = "issue.xml"
175 date = resource.year
176 try:
177 date = datetime.strptime(date, "%Y")
178 resource.DOIdate = "<year>%s</year>" % resource.year
179 except ValueError:
180 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
181 year = resource.year.split("-")[1]
182 resource.DOIdate = "<year>%s</year>" % year
184 elif resource.classname == "TranslatedArticle":
185 with open(
186 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
187 ) as file_:
188 file_.write(resource.doi + "\n")
190 resource.DOIdate = resource.date_published.strftime(
191 "<month>%m</month><day>%d</day><year>%Y</year>"
192 )
193 context["collection"] = resource.original_article.get_top_collection()
195 context["resource"] = resource
197 preprint_id = preprint_type = None
198 qs = resource.extid_set.filter(id_type="preprint")
199 if qs: 199 ↛ 200line 199 didn't jump to line 200, because the condition on line 199 was never true
200 extid = qs.first()
201 preprint_id = extid.id_value
202 preprint_type = find_id_type(preprint_id)
203 # crossref allows "doi" and "arxiv", but not "hal"
204 if preprint_type == "hal":
205 preprint_type = "other"
206 context["preprint_id"] = preprint_id
207 context["preprint_type"] = preprint_type
209 rdoi = None
210 qs = resource.extid_set.filter(id_type="rdoi")
211 if qs: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true
212 rdoi = qs.first().id_value
213 context["rdoi"] = rdoi
215 try:
216 xml = render_to_string(template_name=template, context=context)
217 doibatch.xml = xml
218 doibatch.save()
219 except Exception as e:
220 if resource.classname == "TranslatedArticle":
221 with open(
222 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
223 ) as file_:
224 file_.write(str(e) + "\n")
225 raise e
227 files = {"file": (f"{doibatch.pk}.xml", xml)}
229 data = {"status": 404}
230 if not testing: 230 ↛ 231line 230 didn't jump to line 231, because the condition on line 230 was never true
231 if resource.classname == "TranslatedArticle":
232 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.original_article)
234 with open(
235 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
236 ) as file_:
237 file_.write("Call crossref\n")
239 elif resource.classname == "Container" and resource.ctype.startswith("book"):
240 # pas de doi niveau container, alors pour obtenir les identifiants crossref on part sur le 1er book part
241 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.article_set.first())
242 else:
243 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
245 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
247 r = requests.post(crossref_batch_url, files=files)
248 body = r.text.encode("utf8")
249 if r.status_code == 200:
250 xml = etree.XML(body)
251 title = xml.xpath("//*/title")[0].text
252 if title == "SUCCESS":
253 data["status"] = r.status_code
254 elif r.status_code == 401:
255 doibatch.status = "Erreur"
256 doibatch.log = "Pb d'authentification"
257 doibatch.save()
258 else:
259 doibatch.status = "Erreur"
260 doibatch.save()
261 data["message"] = body[:1000].decode("utf-8")
263 if resource.classname == "TranslatedArticle":
264 with open(
265 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
266 ) as file_:
267 file_.write(doibatch.status + "\n")
268 return data
271def get_user_pwd_crossref(resource):
272 # get CROSSREF credentials from DOI prefix
273 doi = resource.doi
274 prefix = doi.split("/")[0]
275 md_prefix = prefix.split(".")[1]
276 crossref_user_const = "CROSSREF_USER_" + md_prefix
277 crossref_pwd_const = "CROSSREF_PWD_" + md_prefix
278 try:
279 crossref_user = getattr(settings, crossref_user_const)
280 crossref_pwd = getattr(settings, crossref_pwd_const)
281 except AttributeError:
282 crossref_user = settings.CROSSREF_USER_5802
283 crossref_pwd = settings.CROSSREF_PWD_5802
284 return crossref_user, crossref_pwd
287def checkDOIBatch(doibatch):
288 """
289 check DOI batch status by HTTP request
290 @param doibatch: DOIBatch
291 @return: DOIBatch with status and log updated
292 """
294 resource = doibatch.resource
295 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
296 url = settings.CROSSREF_BASE_CHECKBATCH_URL_TPL % (crossref_user, crossref_pwd)
297 url = url.format(doibatch.pk)
298 r = requests.get(url)
299 if r.status_code == 200:
300 # analyse du xml de retour
301 dataXml = r.text.encode("utf8")
302 tree = etree.XML(dataXml)
303 elem = tree.xpath("/doi_batch_diagnostic")[0]
304 batch_status = elem.attrib["status"]
305 if batch_status == "completed":
306 # le batch a été traité
307 doibatch.status = "batch terminé"
308 doibatch.log = "Pas de DOI associé dans le batch : voir le xml"
309 diags = tree.xpath("//*/record_diagnostic")
310 for diag in diags:
311 doi = diag.xpath("doi")[0].text
312 log = diag.xpath("msg")[0].text
313 status = diag.attrib["status"]
314 if doi == doibatch.resource.doi:
315 if status == "Success":
316 doibatch.status = "Enregistré"
317 else:
318 doibatch.status = "Erreur"
319 else:
320 doibatch.status = "Erreur"
321 doibatch.log = log
323 elif batch_status == "in_process" or batch_status == "queued":
324 doibatch.status = "En cours"
325 doibatch.log = "batch en cours de traitement"
326 else: # rafraichit trop tot apres Record DOI
327 doibatch.status = "Erreur"
328 doibatch.log = (
329 "Attention, il se peut qu'il faille rafraichir "
330 "un peu plus tard {} ".format(r.text)
331 )
332 else:
333 doibatch.status = "Erreur"
334 doibatch.log = r.text
335 doibatch.save()
336 return doibatch
339def removeOldDataInCrossref(article, testing=False):
340 """
341 The CRAS 2002-2019 articles were registered by Elsevier
342 To remove some metadata in Crossref, we need to provide a separate XML with the fields to remove
344 @param article:
345 @param testing: Boolean set to True when testing
346 @return: data {status: 200 ou 400, 'message': msg}
347 """
349 doibatch = get_doibatch(article)
350 if doibatch:
351 doibatch.delete()
353 doibatch = DOIBatch(resource=article, status="En cours")
354 doibatch.save()
356 context = {"resource": article, "doi_batch_id": f"{doibatch.pk:04d}"}
358 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
359 context["timestamp"] = timestamp[0:19]
361 context["mail"] = settings.CROSSREF_MAIL
362 template = "crossref/article_remove_old_data.xml"
364 if article.date_published:
365 article.DOIdate = article.date_published.strftime(
366 "<month>%m</month><day>%d</day><year>%Y</year>"
367 )
369 try:
370 xml = render_to_string(template_name=template, context=context)
372 if testing:
373 print(xml)
375 doibatch.xml = xml
376 doibatch.save()
377 except Exception as e:
378 raise e
380 files = {"file": (f"{doibatch.pk}.xml", xml)}
382 data = {"status": 404}
383 if not testing:
384 crossref_user, crossref_pwd = get_user_pwd_crossref(article)
385 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
387 r = requests.post(crossref_batch_url, files=files)
388 body = r.text.encode("utf8")
389 if r.status_code == 200:
390 xml = etree.XML(body)
391 title = xml.xpath("//*/title")[0].text
392 if title == "SUCCESS":
393 data["status"] = r.status_code
394 elif r.status_code == 401:
395 doibatch.status = "Erreur"
396 doibatch.log = "Pb d'authentification"
397 doibatch.save()
398 else:
399 doibatch.status = "Erreur"
400 doibatch.save()
401 data["message"] = body[:1000].decode("utf-8")
403 return data