Coverage for sites/ptf_tools/ptf

1import os

2from datetime import datetime

4import requests

5from lxml import etree

7from django.conf import settings

8from django.core.exceptions import ObjectDoesNotExist

9from django.template.loader import render_to_string

11from mersenne_tools.models import DOIBatch

12from ptf.display.resolver import find_id_type

15def get_doibatch(resource):

16 doibatch = None

17 try:

18 doibatch = resource.doibatch

19 except ObjectDoesNotExist:

20 pass

22 return doibatch

25def get_or_create_doibatch(resource):

26 """

27 @param models.Resource:

28 @return: new or updated doibatch

29 """

31 update_doi = False

32 # check DOI

33 url = settings.DOI_BASE_URL + resource.doi

34 r = requests.get(url, allow_redirects=False)

35 if r.status_code == 302 and resource.get_url_absolute() == r.headers["Location"]:

36 status = "Enregistré"

37 log = "Vérifié sur CROSSREF"

38 update_doi = True

39 elif r.status_code == 302 and resource.get_url_absolute() != r.headers["Location"]:

40 status = "Erreur"

41 log = "Mauvaise URL pour le DOI !!!/à réenregistrer"

42 update_doi = True

43 doibatch = get_doibatch(resource)

44 if update_doi:

45 if doibatch:

46 doibatch.status = status

47 doibatch.log = log

48 else:

49 doibatch = DOIBatch(resource=resource, status=status, log=log)

50 doibatch.save()

51 return doibatch

53 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch

54 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part

55 if (

56 doibatch

57 and resource.classname == "Article"

58 and resource.my_container.ctype.startswith("book")

59 ):

60 doibatch.delete()

61 doibatch = None

63 if doibatch:

64 doibatch = checkDOIBatch(doibatch)

66 return doibatch

69# recordDOI par resource (article)

70# problématique liée à l'enregistrement des DOI chez CROSSREF :

71# - pour enregistrer un DOI, on utilise le DOI du journal comme référence : CROSSREF prend ça comme une demande d'enregistrement/modification !

72# du DOI du journal...

73# ce qui se passe lorsque l'on envoie plusieurs requêtes les unes à la suite des autres (Record all DOIs), c'est que l'ordre de traitement est

74# différent (aléatoire) de l'ordre d'envoi et on obtient ces erreurs :

75# "Record not processed because submitted version: 201810150907372216 is less or equal to previously submitted version {1}"

76# ( MAIS le record impliqué ici est celui du journal, celui de l'article ne pose globalement pas de pb)

77# car il y a un timestamp dans chaque requête

78#

79# pour contrer ces erreurs (avant on ne diagnostiquait que le nombre de failure_count et donc il y en avait une) il faut interpréter le xml de retour ::

80# <record_diagnostic status="Success">

81# <doi>10.5802/alco.21</doi>

82# <msg>Successfully updated</msg>

83# C'est ce qui est retenu (dans checkDOIBatch).

84#

87def recordDOI(resource, testing=False):

88 """

89 @param resource:

90 @param testing: Boolean set to True when testing

91 @return: data {status: 200 ou 400, 'message': msg}

92 """

94 doibatch = get_doibatch(resource)

95 if doibatch: 95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never false

96 doibatch.delete()

98 doibatch = DOIBatch(resource=resource, status="En cours")

99 doibatch.save()

100 context = {}

101 context["doi_batch_id"] = f"{doibatch.pk:04d}"

102 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp

103 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19

104 context["timestamp"] = timestamp[0:19]

105 context["mail"] = settings.CROSSREF_MAIL

106 template = f"crossref/{resource.classname.lower()}_doi_register.xml"

107 crossref_user = None

108 crossref_pwd = None

109

110 # hack pour déterminer la date de publication pour une resource

111 if resource.classname == "Article": 111 ↛ 154line 111 didn't jump to line 154, because the condition on line 111 was never false

112 # si un article n'a pas de contributeurs, on enregistre un posted-content de type other

113 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#posted_content

114 if not resource.get_author_contributions() and resource.classname == "Article": 114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true

115 template = "crossref/posted-content.xml"

116

117 # on est en présence d'un objet qui a besoin d'une date de publication

118 if not resource.date_published and not resource.date_online_first: 118 ↛ 120line 118 didn't jump to line 120, because the condition on line 118 was never true

119 # on extrapole la date du volume

120 date = resource.my_container.year

121 try:

122 date = datetime.strptime(date, "%Y")

123 resource.DOIdate = "<year>%s</year>" % resource.my_container.year

124 except ValueError:

125 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range

126 year = resource.my_container.year.split("-")[1]

127 resource.DOIdate = "<year>%s</year>" % year

128 resource.my_container.year = year

129 else:

130 # on renseigne la date selon le format voulu par CROSSREF

131 if resource.date_published: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true

132 resource.DOIdate = resource.date_published.strftime(

133 "<month>%m</month><day>%d</day><year>%Y</year>"

134 )

135

136 # on check aussi la date du container

137 date = resource.my_container.year

138 try:

139 date = datetime.strptime(date, "%Y")

140 except ValueError:

141 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range

142 year = resource.my_container.year.split("-")[1]

143 resource.my_container.year = year

144 else:

145 # Online First

146 # TODO: Is it possible to send 2 dates to Crossref ?

147 # You can send multiple <publication_date> but it is for multiple media_type (print vs online)

148 resource.DOIdate = resource.date_online_first.strftime(

149 "<month>%m</month><day>%d</day><year>%Y</year>"

150 )

151

152 # Le year du container vaut '0'

153

154 elif resource.classname == "Container":

155 if resource.ctype.startswith("book"):

156 # PS : pas de gestion des chapitres pour les livres, tout est fait dans le template au moment de l'enregistrement du book

157 # template en fct du ctype

158 if resource.my_collection.issn or resource.my_collection.e_issn:

159 template = "crossref/book_series_metadata.xml"

160 else:

161 template = "crossref/book_set_metadata.xml"

162 # else #book tout seul n'appartenant pas à une série

163 # template = book_metadata

164 context["book_type"] = resource.ctype[5:].replace("-", "_")

165 for bookpart in resource.article_set.all():

166 doibatch = get_doibatch(bookpart)

167 if doibatch:

168 doibatch.delete()

169 doibatch = DOIBatch(resource=bookpart, status="En cours")

170 doibatch.save()

171

172 elif resource.ctype == "issue":

173 # TODO

174 template = "issue.xml"

175 date = resource.year

176 try:

177 date = datetime.strptime(date, "%Y")

178 resource.DOIdate = "<year>%s</year>" % resource.year

179 except ValueError:

180 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range

181 year = resource.year.split("-")[1]

182 resource.DOIdate = "<year>%s</year>" % year

183

184 elif resource.classname == "TranslatedArticle":

185 with open(

186 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"

187 ) as file_:

188 file_.write(resource.doi + "\n")

189

190 resource.DOIdate = resource.date_published.strftime(

191 "<month>%m</month><day>%d</day><year>%Y</year>"

192 )

193 context["collection"] = resource.original_article.get_top_collection()

194

195 context["resource"] = resource

196

197 preprint_id = preprint_type = None

198 qs = resource.extid_set.filter(id_type="preprint")

199 if qs: 199 ↛ 200line 199 didn't jump to line 200, because the condition on line 199 was never true

200 extid = qs.first()

201 preprint_id = extid.id_value

202 preprint_type = find_id_type(preprint_id)

203 # crossref allows "doi" and "arxiv", but not "hal"

204 if preprint_type == "hal":

205 preprint_type = "other"

206 context["preprint_id"] = preprint_id

207 context["preprint_type"] = preprint_type

208

209 rdoi = None

210 qs = resource.extid_set.filter(id_type="rdoi")

211 if qs: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 rdoi = qs.first().id_value

213 context["rdoi"] = rdoi

214

215 try:

216 xml = render_to_string(template_name=template, context=context)

217 doibatch.xml = xml

218 doibatch.save()

219 except Exception as e:

220 if resource.classname == "TranslatedArticle":

221 with open(

222 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"

223 ) as file_:

224 file_.write(str(e) + "\n")

225 raise e

226

227 files = {"file": (f"{doibatch.pk}.xml", xml)}

228

229 data = {"status": 404}

230 if not testing: 230 ↛ 231line 230 didn't jump to line 231, because the condition on line 230 was never true

231 if resource.classname == "TranslatedArticle":

232 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.original_article)

233

234 with open(

235 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"

236 ) as file_:

237 file_.write("Call crossref\n")

238

239 elif resource.classname == "Container" and resource.ctype.startswith("book"):

240 # pas de doi niveau container, alors pour obtenir les identifiants crossref on part sur le 1er book part

241 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.article_set.first())

242 else:

243 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)

244

245 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)

246

247 r = requests.post(crossref_batch_url, files=files)

248 body = r.text.encode("utf8")

249 if r.status_code == 200:

250 xml = etree.XML(body)

251 title = xml.xpath("//*/title")[0].text

252 if title == "SUCCESS":

253 data["status"] = r.status_code

254 elif r.status_code == 401:

255 doibatch.status = "Erreur"

256 doibatch.log = "Pb d'authentification"

257 doibatch.save()

258 else:

259 doibatch.status = "Erreur"

260 doibatch.save()

261 data["message"] = body[:1000].decode("utf-8")

262

263 if resource.classname == "TranslatedArticle":

264 with open(

265 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"

266 ) as file_:

267 file_.write(doibatch.status + "\n")

268 return data

269

270

271def get_user_pwd_crossref(resource):

272 # get CROSSREF credentials from DOI prefix

273 doi = resource.doi

274 prefix = doi.split("/")[0]

275 md_prefix = prefix.split(".")[1]

276 crossref_user_const = "CROSSREF_USER_" + md_prefix

277 crossref_pwd_const = "CROSSREF_PWD_" + md_prefix

278 try:

279 crossref_user = getattr(settings, crossref_user_const)

280 crossref_pwd = getattr(settings, crossref_pwd_const)

281 except AttributeError:

282 crossref_user = settings.CROSSREF_USER_5802

283 crossref_pwd = settings.CROSSREF_PWD_5802

284 return crossref_user, crossref_pwd

285

286

287def checkDOIBatch(doibatch):

288 """

289 check DOI batch status by HTTP request

290 @param doibatch: DOIBatch

291 @return: DOIBatch with status and log updated

292 """

293

294 resource = doibatch.resource

295 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)

296 url = settings.CROSSREF_BASE_CHECKBATCH_URL_TPL % (crossref_user, crossref_pwd)

297 url = url.format(doibatch.pk)

298 r = requests.get(url)

299 if r.status_code == 200:

300 # analyse du xml de retour

301 dataXml = r.text.encode("utf8")

302 tree = etree.XML(dataXml)

303 elem = tree.xpath("/doi_batch_diagnostic")[0]

304 batch_status = elem.attrib["status"]

305 if batch_status == "completed":

306 # le batch a été traité

307 doibatch.status = "batch terminé"

308 doibatch.log = "Pas de DOI associé dans le batch : voir le xml"

309 diags = tree.xpath("//*/record_diagnostic")

310 for diag in diags:

311 doi = diag.xpath("doi")[0].text

312 log = diag.xpath("msg")[0].text

313 status = diag.attrib["status"]

314 if doi == doibatch.resource.doi:

315 if status == "Success":

316 doibatch.status = "Enregistré"

317 else:

318 doibatch.status = "Erreur"

319 else:

320 doibatch.status = "Erreur"

321 doibatch.log = log

322

323 elif batch_status == "in_process" or batch_status == "queued":

324 doibatch.status = "En cours"

325 doibatch.log = "batch en cours de traitement"

326 else: # rafraichit trop tot apres Record DOI

327 doibatch.status = "Erreur"

328 doibatch.log = (

329 "Attention, il se peut qu'il faille rafraichir "

330 "un peu plus tard {} ".format(r.text)

331 )

332 else:

333 doibatch.status = "Erreur"

334 doibatch.log = r.text

335 doibatch.save()

336 return doibatch

337

338

339def removeOldDataInCrossref(article, testing=False):

340 """

341 The CRAS 2002-2019 articles were registered by Elsevier

342 To remove some metadata in Crossref, we need to provide a separate XML with the fields to remove

343

344 @param article:

345 @param testing: Boolean set to True when testing

346 @return: data {status: 200 ou 400, 'message': msg}

347 """

348

349 doibatch = get_doibatch(article)

350 if doibatch:

351 doibatch.delete()

352

353 doibatch = DOIBatch(resource=article, status="En cours")

354 doibatch.save()

355

356 context = {"resource": article, "doi_batch_id": f"{doibatch.pk:04d}"}

357

358 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19

359 context["timestamp"] = timestamp[0:19]

360

361 context["mail"] = settings.CROSSREF_MAIL

362 template = "crossref/article_remove_old_data.xml"

363

364 if article.date_published:

365 article.DOIdate = article.date_published.strftime(

366 "<month>%m</month><day>%d</day><year>%Y</year>"

367 )

368

369 try:

370 xml = render_to_string(template_name=template, context=context)

371

372 if testing:

373 print(xml)

374

375 doibatch.xml = xml

376 doibatch.save()

377 except Exception as e:

378 raise e

379

380 files = {"file": (f"{doibatch.pk}.xml", xml)}

381

382 data = {"status": 404}

383 if not testing:

384 crossref_user, crossref_pwd = get_user_pwd_crossref(article)

385 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)

386

387 r = requests.post(crossref_batch_url, files=files)

388 body = r.text.encode("utf8")

389 if r.status_code == 200:

390 xml = etree.XML(body)

391 title = xml.xpath("//*/title")[0].text

392 if title == "SUCCESS":

393 data["status"] = r.status_code

394 elif r.status_code == 401:

395 doibatch.status = "Erreur"

396 doibatch.log = "Pb d'authentification"

397 doibatch.save()

398 else:

399 doibatch.status = "Erreur"

400 doibatch.save()

401 data["message"] = body[:1000].decode("utf-8")

402

403 return data

Coverage for sites/ptf_tools/ptf_tools/doi.py: 19%

245 statements