Coverage for apps/ptf/cmds/ptf_cmds/base_ptf_cmds.py: 70%
859 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
1import copy
2import datetime
3import json
4import os
5import subprocess
6import sys
8import lxml.etree as etree
9from PIL import Image
11from django.conf import settings
12from django.core.exceptions import ObjectDoesNotExist
13from django.template.loader import render_to_string
15from ptf import exceptions
16from ptf import model_helpers
17from ptf import utils
18from ptf.cmds.database_cmds import add_contributors
19from ptf.cmds.database_cmds import addArticleDatabaseCmd
20from ptf.cmds.database_cmds import addBibItemDatabaseCmd
21from ptf.cmds.database_cmds import addBibItemIdDatabaseCmd
22from ptf.cmds.database_cmds import addCollectionDatabaseCmd
23from ptf.cmds.database_cmds import addContainerDatabaseCmd
24from ptf.cmds.database_cmds import addDataStreamDatabaseCmd
25from ptf.cmds.database_cmds import addExtIdDatabaseCmd
26from ptf.cmds.database_cmds import addExtLinkDatabaseCmd
27from ptf.cmds.database_cmds import addFrontMatterDatabaseCmd
28from ptf.cmds.database_cmds import addProviderDatabaseCmd
29from ptf.cmds.database_cmds import addPublisherDatabaseCmd
30from ptf.cmds.database_cmds import addRelatedObjectDatabaseCmd
31from ptf.cmds.database_cmds import addRelationshipDatabaseCmd
32from ptf.cmds.database_cmds import addResourceCountDatabaseCmd
33from ptf.cmds.database_cmds import addSiteDatabaseCmd
34from ptf.cmds.database_cmds import addSupplementaryMaterialDatabaseCmd
35from ptf.cmds.database_cmds import addXmlBaseDatabaseCmd
36from ptf.cmds.database_cmds import baseCmd
37from ptf.cmds.database_cmds import publishArticleDatabaseCmd
38from ptf.cmds.database_cmds import publishContainerDatabaseCmd
39from ptf.cmds.database_cmds import updateCollectionDatabaseCmd
40from ptf.cmds.database_cmds import updateExtLinkDatabaseCmd
41from ptf.cmds.database_cmds import updateResourceIdDatabaseCmd
42from ptf.cmds.solr_cmds import addArticleSolrCmd
43from ptf.cmds.solr_cmds import addBookPartSolrCmd
44from ptf.cmds.solr_cmds import addContainerSolrCmd
45from ptf.cmds.solr_cmds import solrFactory
46from ptf.cmds.solr_cmds import updateResourceSolrCmd
47from ptf.cmds.xml import xml_utils
48from ptf.display import resolver
49from ptf.model_data import PublisherData
50from ptf.models import ExtLink
51from ptf.models import Person
52from ptf.models import Relationship
55def myconverter(o):
56 if isinstance(o, datetime.datetime): 56 ↛ exitline 56 didn't return from function 'myconverter', because the condition on line 56 was never false
57 return o.__str__()
60def do_solr_commit():
61 solrFactory.do_solr_commit()
64def do_solr_rollback():
65 solrFactory.do_solr_rollback()
68#####################################################################
69#
70# addPtfCmd: base class of PtfCmds
71#
72# PtfCmds may have a cmd and a sub-cmd
73# The cmd is executed, and the id of the returned object is passed
74# to the sub-cmd before its execution.
75#
76# This allows to store an object in Django, get the django object id,
77# then store the corresponding document in Solr
78#
79######################################################################
82class addPtfCmd(baseCmd):
83 def __init__(self, params=None):
84 if params is not None and "solr_commit" in params:
85 params["commit"] = params["solr_commit"]
87 super().__init__(params)
88 self.required_delete_params.append("object_to_be_deleted")
90 self.cmd = None
91 self.sub_cmd = None
93 def internal_do(self):
94 obj = super().internal_do()
96 if self.cmd: 96 ↛ 99line 96 didn't jump to line 99, because the condition on line 96 was never false
97 obj = self.cmd.do()
99 if self.sub_cmd:
100 self.sub_cmd.db_obj = obj
101 self.sub_cmd.id = obj.id
102 self.sub_cmd.pid = obj.pid
104 # if hasattr(obj, "title_tex"):
105 # self.sub_cmd.title = obj.title_tex
107 self.sub_cmd.do()
108 # au cas d'un futur undo sur la cmd
109 self.set_object_to_be_deleted(obj)
110 return obj
112 def set_object_to_be_deleted(self, obj):
113 if obj is not None: 113 ↛ exitline 113 didn't return from function 'set_object_to_be_deleted', because the condition on line 113 was never false
114 self.object_to_be_deleted = obj
115 self.cmd.object_to_be_deleted = obj
117 def internal_undo(self):
118 id = super().internal_undo()
120 if self.cmd: 120 ↛ 123line 120 didn't jump to line 123, because the condition on line 120 was never false
121 id = self.cmd.undo()
123 if self.sub_cmd:
124 self.sub_cmd.id = id
125 self.sub_cmd.undo()
127 return id
130#####################################################################
131#
132# addSitePtfCmd: adds/remove a PtfSite
133# params: 'site_name', 'site_domain'
134#
135# Exception raised:
136# - ValueError if the init params are empty
137# - exceptions.ResourceExists during do if the site already exists
138# - exceptions.ResourceDoesNotExist during undo if the site does not exist
139# - RuntimeError during undo if resources are still published
140#
141######################################################################
142class addSitePtfCmd(addPtfCmd):
143 def __init__(self, params=None):
144 super().__init__(params)
146 self.cmd = addSiteDatabaseCmd(params)
149#####################################################################
150#
151# addProviderPtfCmd: adds/remove a Provider
152# params: 'name', 'pid_type', 'sid_type'
153#
154# Exception raised:
155# - ValueError if the init params are empty
156# - exceptions.ResourceExists during do if the provider already exists
157# - exceptions.ResourceDoesNotExist during undo if the provider does not exist
158#
159######################################################################
160class addProviderPtfCmd(addPtfCmd):
161 def __init__(self, params=None):
162 super().__init__(params)
164 self.cmd = addProviderDatabaseCmd(params)
167#####################################################################
168#
169# addXmlBasePtfCmd: adds/remove an XmlBase
170# XmlBase is the root URL of an ExtLink (ex: http://archive.numdam.org/article)
171# params: 'base'
172#
173# Exception raised:
174# - ValueError if the init params are empty
175# - exceptions.ResourceExists during do if the XmlBase already exists
176# - exceptions.ResourceDoesNotExist during undo if the XmlBase does not exist
177# - RuntimeError during undo if related extlinks or objects still exist
178#
179######################################################################
180class addXmlBasePtfCmd(addPtfCmd):
181 def __init__(self, params=None):
182 super().__init__(params)
184 self.cmd = addXmlBaseDatabaseCmd(params)
187#####################################################################
188#
189# addExtLinkPtfCmd: adds/remove an ExtLink
190# params: 'rel': 'website' or 'small_icon'
191# 'mimetype', 'location', 'metadata', 'seq'
192#
193# Needs a Resource object (required) and a XmlBase object (option)
194#
195# Exception raised:
196# - ValueError if the init params are empty
197# - exceptions.ResourceExists during do if the ExtLink already exists
198# - exceptions.ResourceDoesNotExist during undo if the ExtLink does not exist
199# - RuntimeError during undo if resources are still published
200#
201######################################################################
202class addExtLinkPtfCmd(addPtfCmd):
203 def __init__(self, params=None):
204 super().__init__(params)
206 self.cmd = addExtLinkDatabaseCmd(params)
208 def set_resource(self, resource):
209 self.cmd.set_resource(resource)
211 def set_base(self, base):
212 self.cmd.set_base(base)
214 def pre_do(self):
215 super().pre_do()
217 if self.to_folder and self.location.find("file:") == 0: 217 ↛ 222line 217 didn't jump to line 222, because the condition on line 217 was never true
218 # import avec un full path de fichier (ex: Elsevier CRAS)
219 # 1. On copie le fichier
220 # 2. On met à jour le champs location pour utiliser l'arborescence PTF
221 # On fait ça dans le pre_do pour stocker un objet avec le champ location final
222 from_path = self.location[5:]
224 convert_image = False
225 extension = os.path.splitext(self.location)[1]
226 if extension == ".tif" or extension == ".tiff":
227 convert_image = True
228 extension = ".jpg"
230 resource = self.cmd.resource
231 relative_path = resource.pid + extension
232 new_location = os.path.join(resource.get_relative_folder(), relative_path)
233 to_path = os.path.join(self.to_folder, new_location)
235 dest_folder = os.path.dirname(to_path)
236 os.makedirs(dest_folder, exist_ok=True)
238 if convert_image:
239 im = Image.open(from_path)
240 im.thumbnail(im.size)
241 im.save(to_path, "JPEG", quality=100)
242 else:
243 resolver.copy_file(from_path, to_path)
245 self.location = new_location
246 self.cmd.location = new_location
249#####################################################################
250#
251# addExtIdPtfCmd: adds/remove an ExtId
252# params: 'id_type', 'id_value'
253#
254# Needs a Resource object
255#
256# Exception raised:
257# - ValueError if the init params are empty
258# - exceptions.ResourceExists during do if the ExtId already exists
259# - exceptions.ResourceDoesNotExist during undo if the ExtId does not exist
260# - RuntimeError during undo if resources are still published
261#
262######################################################################
263class addExtIdPtfCmd(addPtfCmd):
264 def __init__(self, params=None):
265 super().__init__(params)
267 self.cmd = addExtIdDatabaseCmd(params)
269 def set_resource(self, resource):
270 self.cmd.set_resource(resource)
273#####################################################################
274#
275# addRelatedObjectPtfCmd: adds/remove a RelatedObject
276# params: 'rel':
277# 'mimetype', 'location', 'metadata', 'seq'
278#
279# Needs a Resource object and a XmlBase object
280#
281# Exception raised:
282# - ValueError if the init params are empty
283# - exceptions.ResourceExists during do if the RelatedObject already exists
284# - exceptions.ResourceDoesNotExist during undo if the RelatedObject does not exist
285# - RuntimeError during undo if resources are still published
286#
287######################################################################
288class addRelatedObjectPtfCmd(addPtfCmd):
289 def __init__(self, params=None):
290 super().__init__(params)
291 self.do_linearize = True
293 # need Resource to construct complete path
294 self.required_delete_params.append("resource")
296 self.cmd = addRelatedObjectDatabaseCmd(params)
298 def set_resource(self, resource):
299 self.resource = resource
300 self.cmd.set_resource(resource)
302 def set_base(self, base):
303 self.cmd.set_base(base)
305 def pre_do(self):
306 super().pre_do()
308 full_path_pos = self.location.find("file:")
309 if (
310 self.from_folder and self.to_folder and self.from_folder == settings.CEDRAM_TEX_FOLDER
311 ) or (self.to_folder and full_path_pos != -1):
312 # A. Import d'un XML Cedrics. Les champs location sont relatifs au from_folder.
313 # (contrairement à un import Cedrics transformé en JATS où les champs sont plus ou moins
314 # relatifs au to_folder)
315 # B. Autre possibilité: import avec un full path de fichier (ex: Elsevier CRAS)
316 # RelatedObject est utilisé pour les images des articles (HTML)
317 # Pour les images de couvertures des numéros, ce sont des ExtLink
318 # (voir addExtLinkPtfCmd)
319 # 1. On copie le fichier
320 # 2. On met à jour le champs location pour utiliser l'arborescence PTF
321 # On fait ça dans le pre_do pour stocker un objet avec le champ location final
322 location = self.location
323 if full_path_pos > -1: 323 ↛ 324line 323 didn't jump to line 324, because the condition on line 323 was never true
324 from_path = location[full_path_pos + 5 :].replace(
325 "/ums_dev/numdam_dev", "/numdam_dev"
326 )
327 else:
328 from_path = os.path.join(self.from_folder, location)
330 convert_image = False
331 extension = os.path.splitext(from_path)[1]
332 resource = self.cmd.resource
334 if full_path_pos > -1 and extension in xml_utils.get_elsevier_image_extensions(): 334 ↛ 335line 334 didn't jump to line 335, because the condition on line 334 was never true
335 convert_image = True
336 extension = ".jpg"
338 if full_path_pos > 0: 338 ↛ 339line 338 didn't jump to line 339, because the condition on line 338 was never true
339 relative_path = location[0:full_path_pos]
340 else:
341 i = location.find("/Attach/")
342 if i > 0: 342 ↛ 343line 342 didn't jump to line 343, because the condition on line 342 was never true
343 relative_path = "a" + location[i + 2 :]
344 elif extension == ".tex":
345 relative_path = os.path.join("src/tex", resource.pid + extension)
346 elif extension == ".jpg": 346 ↛ 347line 346 didn't jump to line 347, because the condition on line 346 was never true
347 basename = os.path.splitext(os.path.basename(from_path))[0]
348 relative_path = os.path.join("src/tex/figures", basename + extension)
349 elif hasattr(self, "supplementary_material") and self.supplementary_material: 349 ↛ 352line 349 didn't jump to line 352, because the condition on line 349 was never true
350 # Supplements from Elsevier. They are declared with "file://"
351 # They need to be copied in attach/basename
352 relative_path = "attach/" + os.path.basename(from_path)
353 else:
354 relative_path = resource.pid + extension
356 new_location = os.path.join(resource.get_relative_folder(), relative_path)
357 to_path = os.path.join(self.to_folder, new_location)
359 dest_folder = os.path.dirname(to_path)
360 os.makedirs(dest_folder, exist_ok=True)
362 do_copy = True
363 # linearize_pdf directly create the to_path (ptf-tools only)
364 # there is no need to copy the file in that case
365 if extension.lower() == ".pdf" and self.do_linearize:
366 do_copy = utils.linearize_pdf(from_path, to_path)
367 if do_copy:
368 if convert_image: 368 ↛ 369line 368 didn't jump to line 369, because the condition on line 368 was never true
369 im = Image.open(from_path)
370 size = 1000, 1000
371 im.thumbnail(size, Image.Resampling.LANCZOS)
372 im.save(to_path, "JPEG", quality=90)
373 else:
374 resolver.copy_file(from_path, to_path)
376 self.location = new_location
377 self.cmd.location = new_location
379 def post_do(self, obj):
380 super().post_do(obj)
381 # on est dans le cas où on veut récupérer depuis mathdoc_archive (sinon les fichiers sont copiés dans le pre_do)
382 if self.from_folder == settings.MATHDOC_ARCHIVE_FOLDER and self.to_folder:
383 # on passe ds binary files pour profiter de la logique copy_binary_files qui copie aussi les ExtLink (icon, small-icon)
384 # sinon ces fichiers ne sont pas copiés -> soit icon dans DataStream ou peut-être créer une classe addBinaryFiles dont dépendraient ts les objects avec fichiers
385 # les couvertures ne sont pas dans les xml cedram donc pas de question à se poser dans ce cas
386 resolver.copy_binary_files(obj.resource, self.from_folder, self.to_folder)
388 def pre_undo(self):
389 super().pre_undo()
390 if self.to_folder: 390 ↛ 391line 390 didn't jump to line 391, because the condition on line 390 was never true
391 path = os.path.join(self.to_folder, self.object_to_be_deleted.location)
392 resolver.delete_file(path=path)
395#####################################################################
396#
397# addSupplementaryMaterialPtfCmd: adds/remove a Supplementary Material
398# params: 'rel':
399# 'mimetype', 'location', 'metadata', 'seq', 'caption'
400#
401# Needs a Resource object and a XmlBase object
402#
403# Exception raised:
404# - ValueError if the init params are empty
405# - exceptions.ResourceExists during do if the RelatedObject already exists
406# - exceptions.ResourceDoesNotExist during undo if the RelatedObject does not exist
407# - RuntimeError during undo if resources are still published
408#
409######################################################################
410class addSupplementaryMaterialPtfCmd(addRelatedObjectPtfCmd):
411 def __init__(self, params=None):
412 super().__init__(params)
413 self.cmd = addSupplementaryMaterialDatabaseCmd(params)
414 self.do_linearize = False
417#####################################################################
418#
419# addDataStreamPtfCmd: adds/remove a RelatedObject
420# params: 'rel':
421# 'mimetype', 'location', 'metadata', 'seq'
422#
423# Needs a Resource object and a XmlBase object
424#
425# Exception raised:
426# - ValueError if the init params are empty
427# - exceptions.ResourceExists during do if the DataStream already exists
428# - exceptions.ResourceDoesNotExist during undo if the DataStream does not exist
429# - RuntimeError during undo if resources are still published
430#
431######################################################################
432class addDataStreamPtfCmd(addRelatedObjectPtfCmd):
433 def __init__(self, params=None):
434 super().__init__(params)
435 self.cmd = addDataStreamDatabaseCmd(params)
438# #####################################################################
439# #
440# # addOrUpdateDataStreamPtfCmd: adds or Update a Datastream
441# # params: 'rel':
442# # 'mimetype', 'location', 'metadata', 'seq'
443# #
444# # if new location specify params: 'new_location'
445# # Needs a Resource object and a XmlBase object
446# #
447# # Exception raised:
448# # - ValueError if the init params are empty
449# # - RuntimeError during undo if resources are still published
450# #
451# ######################################################################
452# class addOrUpdateDataStreamPtfCmd(baseCmd):
453# def set_resource(self, resource):
454# self.resource = resource
455#
456# def internal_do(self):
457# super(addOrUpdateDataStreamPtfCmd, self).internal_do()
458# # copy new article pdf cedram_dev to mersenne_test_data
459# datastream_qs = DataStream.objects.filter(resource=self.resource,
460# base=self.base,
461# rel=self.rel,
462# location=self.location)
463#
464# cmd = addDataStreamPtfCmd({'rel':self.rel,
465# 'mimetype':self.mimetype,
466# 'location':self.location,
467# 'text':self.text,
468# 'seq':self.seq
469# })
470# cmd.set_base(self.base)
471# cmd.set_resource(self.resource)
472#
474# if datastream_qs.count() > 0:
475# cmd.set_object_to_be_deleted(datastream_qs.get())
476# cmd.undo()
477# cmd.set_params({'location': self.new_location})
478# cmd.do()
481#####################################################################
482#
483# addResourceCountPtfCmd: adds/remove a ResourceCount
484#
485# A ResourceCount is a generic count element.
486# Exemple: page count, table count, image count...
487#
488# params: 'name', 'value', 'seq'
489#
490# Needs a Resource object
491#
492# Exception raised:
493# - ValueError if the init params are empty
494# - exceptions.ResourceExists during do if the ResourceCount already exists
495# - exceptions.ResourceDoesNotExist during undo if the ResourceCount does not exist
496# - RuntimeError during undo if resources are still published
497#
498######################################################################
499class addResourceCountPtfCmd(addPtfCmd):
500 def __init__(self, params=None):
501 super().__init__(params)
503 self.cmd = addResourceCountDatabaseCmd(params)
505 def set_resource(self, resource):
506 self.cmd.set_resource(resource)
509#####################################################################
510#
511# addBibItemPtfCmd: adds/remove a BibItem
512#
513# No verification is done to check if a BibItem already exists
514# Rationale: BibItems are only added in a loop within an article.
515# The check is actually the existence of the article.
516#
517# Exception raised:
518# - ValueError if the init params are empty
519# - exceptions.ResourceDoesNotExist during undo if the BibItem does not exist
520# - RuntimeError during undo if resources are still published
521#
522######################################################################
523class addBibItemPtfCmd(addPtfCmd):
524 def __init__(self, params=None):
525 super().__init__(params)
527 self.cmd = addBibItemDatabaseCmd(params)
529 def set_resource(self, resource):
530 self.cmd.set_resource(resource)
533#####################################################################
534#
535# addBibItemIdPtfCmd: adds/remove a BibItemId
536#
537# No verification is done to check if a BibItemId already exists
538# Rationale: BibItems are only added inside an article/book
539# The check is actually the existence of the resource.
540#
541# Exception raised:
542# - ValueError if the init params are empty
543# - exceptions.ResourceDoesNotExist during undo if the BibItemId does not exist
544# - RuntimeError during undo if resources are still published
545#
546######################################################################
547class addBibItemIdPtfCmd(addPtfCmd):
548 def __init__(self, params=None):
549 super().__init__(params)
551 self.cmd = addBibItemIdDatabaseCmd(params)
553 def set_bibitem(self, bibitem):
554 self.cmd.set_bibitem(bibitem)
557#####################################################################
558#
559# addFrontMatterPtfCmd: adds/remove a FrontMatter
560#
561# No verification is done to check if a FrontMatter already exists
562# Rationale: FrontMatters are only added inside a book
563# The check is actually the existence of the book.
564#
565# Exception raised:
566# - ValueError if the init params are empty
567# - exceptions.ResourceDoesNotExist during undo if the FrontMatter does not exist
568# - RuntimeError during undo if resources are still published
569#
570######################################################################
571class addFrontMatterPtfCmd(addPtfCmd):
572 def __init__(self, params=None):
573 super().__init__(params)
575 self.cmd = addFrontMatterDatabaseCmd(params)
577 def set_resource(self, resource):
578 self.cmd.set_resource(resource)
581#####################################################################
582#
583# addRelationshipPtfCmd: adds/remove a Relationship
584#
585# Relationship relates 2 resources (ex: articles) with a relation. ex "follows", "followed-by"
586#
587# RelationName are created with a fixture (see app/ptf/apps/ptf/fixtures/initial_data.json
588# Example { "left" : "follows", "right" : "followed-by" }
589# A related-article of an article has 1 relation name (ex "follows" or "followed-by")
590# You need to know if the relation was stored in the left or right attribute of a RelationName,
591# so that you can create/search the Relationship with the correct object/subject.
592# Ex: with A "follows" B, A is the subject and B the object because "follows" is a RelationName.left attribute
593# with A "followed-by" B, A is the object the B the subject because "followed-by" is a RelationName.right attribute
594# A Relationship relates 2 resources with a RelationName
595#
596# Exception raised:
597# - ValueError if the init params are empty
598# - exceptions.ResourceExists during do if the Relationship already exists
599# - exceptions.ResourceDoesNotExist during undo if the Relationship does not exist
600# - RuntimeError during undo if resources are still published
601#
602######################################################################
603class addRelationshipPtfCmd(addPtfCmd):
604 def __init__(self, params=None):
605 super().__init__(params)
607 self.cmd = addRelationshipDatabaseCmd(params)
609 def set_subject_resource(self, resource):
610 self.cmd.set_subject_resource(resource)
612 def set_object_resource(self, resource):
613 self.cmd.set_object_resource(resource)
615 def set_relationname(self, relationname):
616 self.cmd.set_relationname(relationname)
619#####################################################################
620#
621# addPublisherPtfCmd: adds/remove a publisher
622# params: 'name', 'location'
623#
624# Exception raised:
625# - ValueError if the init params are empty
626# - exceptions.ResourceExists during do if the Publisher already exists
627# - exceptions.ResourceDoesNotExist during undo if the Publisher does not exist
628#
629######################################################################
630class addPublisherPtfCmd(addPtfCmd):
631 def __init__(self, params=None):
632 super().__init__(params)
634 self.cmd = addPublisherDatabaseCmd(params)
635 # self.sub_cmd = addPublisherSolrCmd(params)
638#####################################################################
639#
640# addResourcePtfCmd: adds/remove folder for a Resource
641#
642#
643# is responsible of creation/deletion of resource folders
644######################################################################
645class addResourcePtfCmd(addPtfCmd):
646 def post_do(self, obj):
647 super().post_do(obj)
648 # if self.from_folder and self.to_folder:
649 # # binary_files (PDF, images, TeX, Attach) are copied in the addRelatedObjectPtfCmd::pre_do
650 # # We only need to copy the html images
651 # resolver.copy_html_images(obj, from_folder=self.from_folder, to_folder=self.to_folder)
653 def pre_undo(self):
654 super().pre_undo()
655 if self.object_to_be_deleted and self.to_folder:
656 resolver.delete_object_folder(
657 object_folder=self.object_to_be_deleted.get_relative_folder(),
658 to_folder=self.to_folder,
659 )
662#####################################################################
663#
664# addCollectionPtfCmd: adds/remove a journal
665# a Collection needs a Provider object
666#
667# params: 'coltype', 'title_xml', 'wall',
668# 'pid', 'sid',
669# 'title_tex', 'title_html',
670# 'other_ids' Ex. [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
671#
672# Exception raised:
673# - ValueError if the init params are empty
674# - exceptions.ResourceExists during do if the Collection already exists
675# - exceptions.ResourceDoesNotExist during undo if the Collection does not exist
676#
677######################################################################
678class addCollectionPtfCmd(addResourcePtfCmd):
679 def __init__(self, params=None):
680 super().__init__(params)
682 self.cmd = addCollectionDatabaseCmd(params)
684 # self.sub_cmd = addCollectionSolrCmd(params)
686 def set_provider(self, provider):
687 self.cmd.set_provider(provider)
689 def set_parent(self, parent):
690 self.cmd.set_parent(parent)
693#####################################################################
694#
695# addContainerPtfCmd: adds/remove an issue
696# a Container needs a Collection (journal, book-series) that needs a Provider object
697#
698# params: 'year', 'vseries', 'volume', 'number'
699# 'doi','seq',
700#
701# (params common to Container/Article)
702# 'title_xml', 'title_tex', 'title_html', 'lang',
703# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
704# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
705# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
706# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
707# Ex2: # [ { 'content_type': content_type, 'lang': lang,
708# 'kwds': [ value1, value2,... ] } ]
709#
710# Exception raised:
711# - ValueError if the init params are empty
712# - exceptions.ResourceExists during do if the issue already exists
713# - exceptions.ResourceDoesNotExist during undo if the Container does not exist
714#
715######################################################################
716class addContainerPtfCmd(addResourcePtfCmd):
717 def __init__(self, params=None):
718 super().__init__(params)
719 self.required_params.extend(["xobj"])
721 self.cmd = addContainerDatabaseCmd(params)
722 if hasattr(self, "xobj") and (
723 self.xobj.ctype.startswith("book") or self.xobj.ctype == "lecture-notes"
724 ):
725 self.sub_cmd = addContainerSolrCmd(params)
727 self.article_ids = []
729 def add_collection(self, collection):
730 self.cmd.add_collection(collection)
731 if self.sub_cmd:
732 self.sub_cmd.add_collection(collection)
734 def set_publisher(self, publisher):
735 pass
737 # self.sub_cmd.publisher_id = publisher.id
739 def set_provider(self, provider):
740 self.cmd.set_provider(provider)
742 def pre_undo(self):
743 # To delete a container directly (cmd=addContainerPtfCmd({'pid':pid,'ctype':ctype}); cmd.undo() and
744 # associated set)
745 # you simply need to pass its pid AND ctype.
746 # addContainerPtfCmd is then responsible to remove the issue and its articles from the system
747 # Django automatically remove all objects related to the container (cascade)
748 # But we need to manually remove the articles of the container from SolR
749 # Store the article ids in pre_undo and delete the Solr articles in
750 # internal_undo
751 #
752 # addResourcePtfCmd is responsible to remove articles binary files from the system
754 super().pre_undo()
755 if self.object_to_be_deleted: 755 ↛ exitline 755 didn't return from function 'pre_undo', because the condition on line 755 was never false
756 for article in self.object_to_be_deleted.article_set.all():
757 self.article_ids.append(article.id)
759 # Exception to the Django cascade mecanism: Relationship.
760 # A Relationship links 2 articles.
761 # If an article is removed, Django automatically deletes the Relationship.
762 # It's not good, we want the relationship to remain, but the article field set to None
764 qs = Relationship.objects.filter(resource=article)
765 for r in qs:
766 if r.related is None:
767 r.delete()
768 else:
769 r.resource = None
770 r.save()
771 qs = Relationship.objects.filter(related=article)
772 for r in qs:
773 if r.resource is None:
774 r.delete()
775 else:
776 r.related = None
777 r.save()
779 def internal_undo(self):
780 for id in self.article_ids:
781 cmd = addArticleSolrCmd({"id": id, "solr_commit": False})
782 cmd.undo()
784 id = super().internal_undo()
785 return id
787 def post_undo(self):
788 super().post_undo()
790 Person.objects.clean()
793#####################################################################
794#
795# addArticlePtfCmd: adds/remove an article
796# an Article needs a Container that needs a Collection (Journal) that needs a Provider object
797#
798# params: fpage, lpage, doi, seq, atype (article type), page_range, elocation, article_number, talk_number
799#
800# pseq (parent seq)
801# related_article ?
802#
803# (params common to Container/Article)
804# 'title_xml', 'title_tex', 'title_html', 'lang',
805# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
806# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
807# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
808# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
809# Ex2: # [ { 'content_type': content_type, 'lang': lang,
810# 'kwds': [ value1, value2,... ] } ]
813#
814# Exception raised:
815# - ValueError if the init params are empty
816# - exceptions.ResourceExists during do if the article already exists
817# - exceptions.ResourceDoesNotExist during undo if the Article does not exist
818#
819######################################################################
820class addArticlePtfCmd(addResourcePtfCmd):
821 def __init__(self, params=None):
822 super().__init__(params)
823 self.cmd = addArticleDatabaseCmd(params)
825 # is_cr = False
826 # if (hasattr(settings, 'SITE_NAME') and len(settings.SITE_NAME) == 6 and settings.SITE_NAME[
827 # 0:2] == "cr"):
828 # is_cr = True
829 #
830 # to_appear = False
831 # if (params is not None and 'xobj' in params and
832 # hasattr(settings, 'ISSUE_TO_APPEAR_PID') and
833 # params['xobj'].pid.find(settings.ISSUE_TO_APPEAR_PID) == 0):
834 # to_appear = True
835 #
836 # # The articles to appear are not stored in the search engine.
837 # if is_cr or not to_appear:
838 self.sub_cmd = addArticleSolrCmd(params)
840 def set_container(self, container):
841 self.cmd.set_container(container)
842 if self.sub_cmd: 842 ↛ exitline 842 didn't return from function 'set_container', because the condition on line 842 was never false
843 self.sub_cmd.set_container(container)
845 def set_provider(self, provider):
846 self.cmd.set_provider(provider)
848 def set_eprint(self, eprint):
849 self.sub_cmd.set_eprint(eprint)
851 def set_source(self, source):
852 self.sub_cmd.set_source(source)
854 def set_thesis(self, thesis):
855 self.sub_cmd.set_thesis(thesis)
857 def add_collection(self, collection):
858 self.cmd.set_collection(collection)
860 if self.sub_cmd: 860 ↛ exitline 860 didn't return from function 'add_collection', because the condition on line 860 was never false
861 self.sub_cmd.add_collection(collection)
863 def post_do(self, article):
864 super().post_do(article)
865 for xtrans_article, trans_article in zip( 865 ↛ 868line 865 didn't jump to line 868, because the loop on line 865 never started
866 self.xobj.translations, self.cmd.translated_articles
867 ):
868 solr_xtrans_article = copy.deepcopy(xtrans_article)
869 solr_xtrans_article.trans_title_tex = self.xobj.title_tex
870 solr_xtrans_article.trans_title_html = self.xobj.title_html
871 if article.trans_lang == xtrans_article.lang:
872 if article.trans_title_tex:
873 solr_xtrans_article.title_tex = article.trans_title_tex
874 solr_xtrans_article.title_html = article.trans_title_html
875 for abstract in self.xobj.abstracts:
876 if abstract["tag"] == "abstract" and abstract["lang"] == xtrans_article.lang:
877 solr_xtrans_article.abstracts = [abstract]
879 sub_cmd = addArticleSolrCmd({"xobj": solr_xtrans_article})
880 sub_cmd.set_container(article.my_container)
881 sub_cmd.add_collection(article.get_collection())
882 sub_cmd.db_obj = trans_article
883 sub_cmd.id = trans_article.id
884 sub_cmd.pid = trans_article.pid
885 sub_cmd.do()
886 # xtrans_article.doi = doi_sav
888 def pre_undo(self):
889 super().pre_undo()
891 qs = Relationship.objects.filter(resource=self.object_to_be_deleted)
892 for r in qs:
893 if r.related is None:
894 r.delete()
895 else:
896 r.resource = None
897 r.save()
898 qs = Relationship.objects.filter(related=self.object_to_be_deleted)
899 for r in qs:
900 if r.resource is None:
901 r.delete()
902 else:
903 r.related = None
904 r.save()
906 def internal_undo(self):
907 if self.object_to_be_deleted: 907 ↛ 915line 907 didn't jump to line 915, because the condition on line 907 was never false
908 cmd = addArticleSolrCmd({"id": self.object_to_be_deleted.id, "solr_commit": False})
909 cmd.undo()
911 for trans_article in self.object_to_be_deleted.translations.all(): 911 ↛ 912line 911 didn't jump to line 912, because the loop on line 911 never started
912 cmd = addArticleSolrCmd({"id": trans_article.id, "solr_commit": False})
913 cmd.undo()
915 id = super().internal_undo()
916 return id
919#####################################################################
920#
921# addBookPartPtfCmd: adds/remove a book part
922#
923# TODO an Article is used to store a book part in the database. Why not use a JournalArticle in SolR ?
924#
925# params: 'year', 'fpage', 'lpage'
926# 'colid' Ex: [ 1,2 ]
927#
928# (params common to Book)
929# 'title_xml', 'title_tex', 'title_html', 'lang',
930# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
931# 'ext_ids' Ex: [ ('zbl-item-id','0216.23901'), ('mr-item-id', '289322') ]
932# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
933# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
934# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
935# Ex2: # [ { 'content_type': content_type, 'lang': lang,
936# 'kwds': [ value1, value2,... ] } ]
937# 'bibitem' Ex: ["1) Name - Title", "2) Name2 - Title2" ]
938#
939# Exception raised:
940# - ValueError if the init params are empty
941# - exceptions.ResourceExists during do if the book part already exists
942# - exceptions.ResourceDoesNotExist during undo if the BookPart does not exist
943#
944######################################################################
945class addBookPartPtfCmd(addResourcePtfCmd):
946 def __init__(self, params=None):
947 super().__init__(params)
949 self.cmd = addArticleDatabaseCmd(params)
950 self.sub_cmd = addBookPartSolrCmd(params)
952 def set_container(self, container):
953 self.cmd.set_container(container)
954 self.sub_cmd.set_container(container)
955 # 'colid' is used to find the collection of a book part
956 # TODO store the book_id as well ?
958 def add_collection(self, collection):
959 # manage collection MBK : only index the other collection
960 if collection.pid != "MBK": 960 ↛ exitline 960 didn't return from function 'add_collection', because the condition on line 960 was never false
961 self.sub_cmd.add_collection(collection)
964##########################################################################
965##########################################################################
966#
967# Update Commands
968#
969##########################################################################
970##########################################################################
973#####################################################################
974#
975# updateCollectionPtfCmd: updates a journal
976# a Collection needs a Provider object
977#
978# params: 'coltype', 'title_xml', 'wall',
979# 'pid', 'sid',
980# 'title_tex', 'title_html',
981# 'other_ids' Ex. [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
982#
983# Exception raised:
984# - ValueError if the init params are empty
985# - exceptions.ResourceDoesNotExist during do if the Collection does not exist
986#
987######################################################################
988class updateCollectionPtfCmd(addPtfCmd):
989 def __init__(self, params=None):
990 super().__init__(params)
992 self.cmd = updateCollectionDatabaseCmd(params)
993 # self.sub_cmd = addCollectionSolrCmd(params)
995 def set_provider(self, provider):
996 self.cmd.set_provider(provider)
998 def set_publisher(self, publisher):
999 self.sub_cmd.set_publisher(publisher)
1002#####################################################################
1003#
1004# updateResourceIdPtfCmd: upates an existing ResourceId
1005# params: 'id_type': 'doi', 'issn', 'e-issn'
1006# 'id_value'
1007#
1008# Needs a Resource object (required)
1009#
1010# Exception raised:
1011# - ValueError if the init params are empty
1012# - exceptions.ResourceDoesNotExist during do if the ResourceId does not exist
1013#
1014######################################################################
1015class updateResourceIdPtfCmd(addPtfCmd):
1016 def __init__(self, params={}):
1017 super().__init__(params)
1019 self.cmd = updateResourceIdDatabaseCmd(params)
1021 def set_resource(self, resource):
1022 self.cmd.set_resource(resource)
1025#####################################################################
1026#
1027# updateExtLinkPtfCmd: upates an existing ExtLink
1028# params: 'rel': 'website' or 'small_icon'
1029# 'mimetype', 'location', 'metadata', 'seq'
1030#
1031# Needs a Resource object (required)
1032# TODO: update the related XmlBase object
1033#
1034# Exception raised:
1035# - ValueError if the init params are empty
1036# - exceptions.ResourceDoesNotExist during do if the ExtLink does not exist
1037#
1038######################################################################
1039class updateExtLinkPtfCmd(addPtfCmd):
1040 def __init__(self, params=None):
1041 super().__init__(params)
1043 self.cmd = updateExtLinkDatabaseCmd(params)
1045 def set_resource(self, resource):
1046 self.cmd.set_resource(resource)
1049class importExtraDataPtfCmd(baseCmd):
1050 """
1051 Restore additional info, such as checked/false_positive attributes on extid/bibitemid
1053 results: articles are updated
1054 """
1056 def __init__(self, params=None):
1057 self.pid = None
1058 self.import_folder = None
1060 super().__init__(params)
1062 self.required_params.extend(["pid", "import_folder"])
1064 def copy_file(self, filename, resource, from_pid):
1065 # on recupere potentiellement l'image positionnée via ptf-tools pour la resource
1066 # il faut renommer l'image car la logique est d'avoir une image avec pour nom pid.EXT
1067 # En cas de déplacement d'online first, from_pid peut être différent de resource.pid
1068 basename = os.path.basename(filename)
1069 extension = os.path.splitext(filename)[1]
1070 if (f"{from_pid}{extension}") == basename:
1071 new_basename = f"{resource.pid}{extension}"
1072 from_path = os.path.join(self.import_folder, filename)
1073 new_filename = os.path.join(resource.get_relative_folder(), new_basename)
1074 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, new_filename)
1075 resolver.copy_file(from_path, to_path)
1076 filename = new_filename
1077 return filename
1079 def import_article_extra_info(self, article, article_data):
1080 if article_data is None: 1080 ↛ 1081line 1080 didn't jump to line 1081, because the condition on line 1080 was never true
1081 return
1083 for extid_data in article_data["extids"]:
1084 model_helpers.add_or_update_extid(
1085 article,
1086 extid_data["type"],
1087 extid_data["value"],
1088 extid_data["checked"],
1089 extid_data["false_positive"],
1090 False,
1091 )
1093 for ref_data in article_data["references"]:
1094 bibitem = model_helpers.get_bibitem_by_seq(article, ref_data["seq"])
1095 if bibitem: 1095 ↛ 1093line 1095 didn't jump to line 1093, because the condition on line 1095 was never false
1096 for bibid_data in ref_data["bibids"]:
1097 model_helpers.add_or_update_bibitemid(
1098 bibitem,
1099 bibid_data["type"],
1100 bibid_data["value"],
1101 bibid_data["checked"],
1102 bibid_data["false_positive"],
1103 False,
1104 )
1106 if "date_published" in article_data: 1106 ↛ 1107line 1106 didn't jump to line 1107, because the condition on line 1106 was never true
1107 date = model_helpers.parse_date_str(article_data["date_published"])
1108 article.date_published = date
1109 article.save()
1111 if "date_pre_published" in article_data: 1111 ↛ 1112line 1111 didn't jump to line 1112, because the condition on line 1111 was never true
1112 date = model_helpers.parse_date_str(article_data["date_pre_published"])
1113 article.date_pre_published = date
1114 article.save()
1116 if "date_online_first" in article_data:
1117 date = model_helpers.parse_date_str(article_data["date_online_first"])
1118 article.date_online_first = date
1119 article.save()
1121 if "deployed_date" in article_data: 1121 ↛ 1122line 1121 didn't jump to line 1122, because the condition on line 1121 was never true
1122 date = model_helpers.parse_date_str(article_data["deployed_date"])
1123 ptfSite = model_helpers.get_site_mersenne(article.get_top_collection().pid)
1124 article.deploy(ptfSite, date)
1126 if "icon" in article_data:
1127 file = self.copy_file(article_data["icon"], article, article_data["pid"])
1128 cmd = addorUpdateExtLinkPtfCmd({"rel": "icon", "location": file})
1129 cmd.set_resource(article)
1130 cmd.do()
1132 if "show_body" in article_data:
1133 article.show_body = article_data["show_body"]
1134 article.save()
1136 if "do_not_publish" in article_data:
1137 article.do_not_publish = article_data["do_not_publish"]
1138 article.save()
1140 if ( 1140 ↛ 1145line 1140 didn't jump to line 1145
1141 settings.SITE_NAME == "ptf_tools"
1142 and "doi_status" in article_data
1143 and article_data["doi_status"] != 0
1144 ):
1145 if (
1146 article.pid == article_data["pid"]
1147 ): # on restreint aux articles qui ne changent pas de pid
1148 from mersenne_tools.models import DOIBatch
1149 from ptf_tools.doi import get_doibatch
1151 doib = get_doibatch(article)
1152 if not doib:
1153 doibatch = DOIBatch(
1154 resource=article,
1155 status=article_data["doi_status"],
1156 id=article_data["doibatch_id"],
1157 xml=article_data["doibatch_xml"],
1158 log="-- import --",
1159 )
1160 doibatch.save()
1162 def import_container_extra_info(self, container, data):
1163 ptfSite = model_helpers.get_site_mersenne(container.my_collection.pid)
1165 if "deployed_date" in data:
1166 date = model_helpers.parse_date_str(data["deployed_date"])
1167 container.deploy(ptfSite, date)
1169 if "icon" in data: 1169 ↛ 1170line 1169 didn't jump to line 1170, because the condition on line 1169 was never true
1170 file = self.copy_file(data["icon"], container, container.pid)
1171 cmd = addorUpdateExtLinkPtfCmd({"rel": "icon", "location": file})
1172 cmd.set_resource(container)
1173 cmd.do()
1175 for article_data in data["articles"]:
1176 article = None
1177 if article_data["doi"]: 1177 ↛ 1179line 1177 didn't jump to line 1179, because the condition on line 1177 was never false
1178 article = model_helpers.get_article_by_doi(article_data["doi"])
1179 if not article:
1180 article = model_helpers.get_article(article_data["pid"])
1181 if article:
1182 self.import_article_extra_info(article, article_data)
1184 def internal_do(self):
1185 super().internal_do()
1186 article_pid = None
1188 resource = model_helpers.get_resource(self.pid)
1189 if not resource: 1189 ↛ 1190line 1189 didn't jump to line 1190, because the condition on line 1189 was never true
1190 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1192 obj = resource.cast()
1194 classname = obj.classname.lower()
1195 if classname == "article": 1195 ↛ 1196line 1195 didn't jump to line 1196, because the condition on line 1195 was never true
1196 article_pid = self.pid
1198 container = obj.get_container()
1199 container_pid = container.pid
1200 collection = container.my_collection
1202 file = resolver.get_archive_filename(
1203 self.import_folder, collection.pid, container_pid, "json", article_pid=article_pid
1204 )
1206 if os.path.exists(file):
1207 with open(file, encoding="utf-8") as f:
1208 data = json.load(f)
1210 fct_name = f"import_{classname}_extra_info"
1211 ftor = getattr(self, fct_name, None)
1212 if callable(ftor): 1212 ↛ exitline 1212 didn't return from function 'internal_do', because the condition on line 1212 was never false
1213 ftor(obj, data)
1216#####################################################################
1217#
1218# addDjvuPtfCmd: add a Djvu to an existing issue
1219# Used when an issue is sent to Numdam by ptf-tools
1220#
1221# Needs a Resource object (required)
1222#
1223# Exception raised:
1224# - ValueError if the init params are empty
1225#
1226######################################################################
1227class addDjvuPtfCmd(baseCmd):
1228 def __init__(self, params={}):
1229 self.resource = None
1231 super().__init__(params)
1233 self.required_params.extend(["resource"])
1235 def set_resource(self, resource):
1236 self.resource = resource
1238 def convert_pdf_to_djvu(self):
1239 obj = self.resource.cast()
1240 qs = obj.datastream_set.filter(mimetype="image/x.djvu")
1241 if qs.count() == 0:
1242 qs = obj.datastream_set.filter(mimetype="application/pdf")
1243 if qs.count() != 0:
1244 datastream = qs.first()
1245 location = datastream.location.replace(".pdf", ".djvu")
1247 folder = settings.MERSENNE_PROD_DATA_FOLDER
1248 if (
1249 hasattr(settings, "NUMDAM_COLLECTIONS")
1250 and obj.my_container.my_collection.pid in settings.NUMDAM_COLLECTIONS
1251 ):
1252 folder = settings.MERSENNE_TEST_DATA_FOLDER
1254 # Create the djvu in MERSENNE_PROD_DATA_FOLDER (used to archive)
1255 djvu_filename = os.path.join(folder, location)
1257 if not os.path.isfile(djvu_filename):
1258 pdf_filename = os.path.join(folder, datastream.location)
1259 if not os.path.isfile(pdf_filename):
1260 pdf_filename = os.path.join(
1261 settings.MERSENNE_TEST_DATA_FOLDER, datastream.location
1262 )
1264 cmd_str = "pdf2djvu --quiet --dpi 600 --output {} {}".format(
1265 djvu_filename, pdf_filename
1266 )
1268 subprocess.check_output(cmd_str, shell=True)
1270 # Copy the new djvu in MERSENNE_TEST_DATA_FOLDER (used to deploy)
1271 djvu_filename_in_test = os.path.join(
1272 settings.MERSENNE_TEST_DATA_FOLDER, location
1273 )
1274 if djvu_filename_in_test != djvu_filename:
1275 resolver.copy_file(djvu_filename, djvu_filename_in_test)
1277 cmd = addDataStreamDatabaseCmd(
1278 {
1279 "rel": "full-text",
1280 "mimetype": "image/x.djvu",
1281 "location": location,
1282 "text": "Full (DJVU)",
1283 "seq": qs.count() + 1,
1284 }
1285 )
1286 cmd.set_resource(obj)
1287 cmd.do()
1289 if (
1290 not hasattr(obj, "ctype")
1291 or (hasattr(obj, "ctype") and obj.ctype.startswith("book"))
1292 or (hasattr(obj, "ctype") and obj.ctype == "lecture-notes")
1293 ):
1294 self.update_solr(obj, location)
1296 def update_solr(self, resource, djvu_location):
1297 params = {"djvu": djvu_location}
1298 cmd = updateResourceSolrCmd(params)
1299 cmd.set_resource(resource)
1300 cmd.do()
1302 # Convert the PDF in Djvu
1303 def internal_do(self):
1304 super().internal_do()
1306 self.convert_pdf_to_djvu()
1309#####################################################################
1310#
1311# addorUpdateContribsPtfCmd: update the list of contributions of a Resource
1312# Remove the existing contributions and replace with the new ones
1313#
1314# Needs a Resource object (required)
1315#
1316# Exception raised:
1317# - ValueError if the init params are empty
1318#
1319######################################################################
1320class addorUpdateContribsPtfCmd(baseCmd):
1321 def __init__(self, params={}):
1322 self.resource = None
1323 self.contributors = []
1325 super().__init__(params)
1327 self.required_params.extend(["resource"])
1329 def set_resource(self, resource):
1330 self.resource = resource
1332 def internal_do(self):
1333 super().internal_do()
1335 self.resource.contributions.all().delete()
1336 add_contributors(self.contributors, self.resource)
1338 cmd = updateResourceSolrCmd({"contributors": self.contributors})
1339 cmd.set_resource(self.resource)
1340 cmd.do()
1343#####################################################################
1344#
1345# addorUpdateKwdsPtfCmd: update the keywords of a Resource
1346# Remove the existing keywords and replace with the new ones
1347#
1348# Needs a Resource object (required)
1349#
1350# TODO: pass a list of kwd_groups instead of separate kwd_<lang> values
1351#
1352# Exception raised:
1353# - ValueError if the init params are empty
1354#
1355######################################################################
1356# class addorUpdateKwdsPtfCmd(baseCmd):
1357# def __init__(self, params={}):
1358# self.resource = None
1359# self.kwds_fr = None
1360# self.kwds_en = None
1361# self.kwd_uns_fr = None
1362# self.kwd_uns_en = None
1363#
1364# super(addorUpdateKwdsPtfCmd, self).__init__(params)
1365#
1366# self.required_params.extend(['resource'])
1367#
1368# def set_resource(self, resource):
1369# self.resource = resource
1370#
1371# def addOrUpdateKwds(self, kwd_uns, kwds, lang):
1372# kwds_groups_qs = self.resource.kwdgroup_set.filter(content_type='', lang=lang)
1373# if kwds_groups_qs.exists():
1374# # There is already a kwd_group.
1375# group = kwds_groups_qs.first()
1376# # First, delete all its kwds
1377# group.kwd_set.all().delete()
1378# group.delete()
1379#
1380# new_kwd_group = None
1381#
1382# if kwd_uns or kwds:
1383# new_kwd_group = {'content_type': '', 'lang': lang, 'kwds': kwds}
1384# if kwd_uns:
1385# new_kwd_group['value_tex'] = kwd_uns
1386# new_kwd_group['value_html'] = kwd_uns
1387# new_kwd_group[
1388# 'value_xml'] = '<unstructured-kwd-group xml:space="preserve">' + kwd_uns + '</unstructured-kwd-group>'
1389# else:
1390# # Build value_tex and value_html for display and SolR
1391# # But do not create value_xml: it is done by the XML export templates (OAI, PubMed)
1392# value = ''
1393# for kwd in kwds:
1394# if value:
1395# value += ', '
1396# value += kwd
1397# new_kwd_group['value_tex'] = value
1398# new_kwd_group['value_html'] = value
1399#
1400# addKwdGroup(new_kwd_group, self.resource)
1401#
1402# return new_kwd_group
1403#
1404# def internal_do(self):
1405# super(addorUpdateKwdsPtfCmd, self).internal_do()
1406#
1407# kwd_groups = []
1408# kwd_group = self.addOrUpdateKwds(self.kwd_uns_fr, self.kwds_fr, 'fr')
1409# if kwd_group:
1410# kwd_groups.append(kwd_group)
1411#
1412# kwd_group = self.addOrUpdateKwds(self.kwd_uns_en, self.kwds_en, 'en')
1413# if kwd_group:
1414# kwd_groups.append(kwd_group)
1415#
1416# cmd = updateResourceSolrCmd({'kwd_groups': kwd_groups})
1417# cmd.set_resource(self.resource)
1418# cmd.do()
1421#####################################################################
1422#
1423# addorUpdateExtLinkPtfCmd: update the list of contribs of a Resource
1424# Remove the existing contribs and replace with the new ones
1425#
1426# Needs a Resource object (required)
1427#
1428# Exception raised:
1429# - ValueError if the init params are empty
1430#
1431# TODO : les images de couv - les icon - sont stockées ici mais du coup ne profite pas DIRECTEMENT de la logique de copie de fichiers des RelatedObjects
1432######################################################################
1433class addorUpdateExtLinkPtfCmd(baseCmd):
1434 def __init__(self, params={}):
1435 self.resource = None
1436 self.location = None
1437 self.rel = None
1438 self.mimetype = ""
1440 super().__init__(params)
1442 self.required_params.extend(["resource", "rel"])
1444 def set_resource(self, resource):
1445 self.resource = resource
1447 def internal_do(self):
1448 super().internal_do()
1450 extlink_qs = ExtLink.objects.filter(resource=self.resource, rel=self.rel)
1452 if extlink_qs.exists(): 1452 ↛ 1453line 1452 didn't jump to line 1453, because the condition on line 1452 was never true
1453 extlink = extlink_qs.first()
1454 if self.location:
1455 extlink.location = self.location
1456 extlink.save()
1457 else:
1458 extlink.delete()
1459 elif self.location: 1459 ↛ exitline 1459 didn't return from function 'internal_do', because the condition on line 1459 was never false
1460 params = {
1461 "rel": self.rel,
1462 "mimetype": self.mimetype,
1463 "location": self.location,
1464 "seq": 1,
1465 "metadata": "",
1466 }
1468 cmd = addExtLinkPtfCmd(params)
1469 cmd.set_resource(self.resource)
1470 cmd.do()
1473#####################################################################
1474#
1475# updateArticlePtfCmd: update an existing Article
1476# Olivier: 12/06/2020. This function needs major refactoring.
1477# If page_count is not provided, it gets deleted.
1478# There should be a way to pass only attributes to edit
1479#
1480# Needs an Article object (required)
1481#
1482# Exception raised:
1483# - ValueError if the init params are empty
1484#
1485######################################################################
1486class updateArticlePtfCmd(baseCmd):
1487 def __init__(self, params={}):
1488 self.article = None
1489 self.title_xml = None
1490 self.title_html = None
1491 self.title_tex = None
1492 self.authors = None
1493 self.page_count = None
1494 self.use_page_count = True
1495 self.icon_location = None
1496 self.body = None
1497 self.body_tex = None
1498 self.body_html = None
1499 self.body_xml = None
1500 # self.use_kwds = None
1501 # self.kwds_fr = None
1502 # self.kwds_en = None
1503 # self.kwd_uns_fr = None
1504 # self.kwd_uns_en = None
1506 super().__init__(params)
1508 self.required_params.extend(["article"])
1510 def set_article(self, article):
1511 self.article = article
1513 def internal_do(self):
1514 super().internal_do()
1516 container = self.article.my_container
1517 collection = container.my_collection
1519 if self.title_tex and self.title_html and self.title_xml: 1519 ↛ 1520line 1519 didn't jump to line 1520, because the condition on line 1519 was never true
1520 self.article.title_tex = self.title_tex
1521 self.article.title_html = self.title_html
1522 self.article.title_xml = self.title_xml
1523 self.article.save()
1525 if self.body_xml or self.body_html or self.body_tex: 1525 ↛ 1532line 1525 didn't jump to line 1532, because the condition on line 1525 was never false
1526 self.article.body_tex = self.body_tex
1527 self.article.body_html = self.body_html
1528 self.article.body_xml = self.body_xml
1529 self.article.save()
1531 # Authors
1532 if self.authors: 1532 ↛ 1533line 1532 didn't jump to line 1533, because the condition on line 1532 was never true
1533 params = {"contributors": self.authors}
1534 cmd = addorUpdateContribsPtfCmd(params)
1535 cmd.set_resource(self.article)
1536 cmd.do()
1538 # Page count
1539 if self.use_page_count: 1539 ↛ 1540line 1539 didn't jump to line 1540, because the condition on line 1539 was never true
1540 qs = self.article.resourcecount_set.filter(name="page-count")
1541 if qs.exists():
1542 qs.first().delete()
1543 if self.page_count:
1544 seq = self.article.resourcecount_set.count() + 1
1545 params = {"name": "page-count", "value": self.page_count, "seq": seq}
1546 cmd = addResourceCountPtfCmd(params)
1547 cmd.set_resource(self.article)
1548 cmd.do()
1550 # Add a DataStream for the PDF
1551 qs = self.article.datastream_set.filter(mimetype="application/pdf")
1552 if not qs.exists(): 1552 ↛ 1553line 1552 didn't jump to line 1553, because the condition on line 1552 was never true
1553 folder = resolver.get_relative_folder(collection.pid, container.pid, self.article.pid)
1554 location = os.path.join(folder, self.article.pid + ".pdf")
1555 params = {
1556 "rel": "full-text",
1557 "mimetype": "application/pdf",
1558 "location": location,
1559 "seq": self.article.datastream_set.count() + 1,
1560 "text": "Full (PDF)",
1561 }
1562 cmd = addDataStreamPtfCmd(params)
1563 cmd.set_resource(self.article)
1564 cmd.do()
1566 # image ajoutée via ptf-tools pour un article
1567 if self.icon_location: 1567 ↛ 1568line 1567 didn't jump to line 1568, because the condition on line 1567 was never true
1568 params = {"rel": "icon", "location": self.icon_location}
1569 cmd = addorUpdateExtLinkPtfCmd(params)
1570 cmd.set_resource(self.article)
1571 cmd.do()
1573 # Kwds
1574 # if self.use_kwds:
1575 # params = {'kwds_en': self.kwds_en, 'kwds_fr': self.kwds_fr,
1576 # 'kwd_uns_en': self.kwd_uns_en, 'kwd_uns_fr': self.kwd_uns_fr}
1577 # cmd = addorUpdateKwdsPtfCmd(params)
1578 # cmd.set_resource(self.article)
1579 # cmd.do()
1581 if self.body or self.title_tex: 1581 ↛ 1582line 1581 didn't jump to line 1582, because the condition on line 1581 was never true
1582 params = {}
1583 if self.body:
1584 params["body"] = self.body
1585 if self.title_tex and self.title_html:
1586 params["title_tex"] = self.title_tex
1587 params["title_html"] = self.title_html
1589 cmd = updateResourceSolrCmd(params)
1590 cmd.set_resource(self.article)
1591 cmd.do()
1594#####################################################################
1595#
1596# updateContainerPtfCmd: update an existing Container
1597#
1598# Needs a Container object (required)
1599#
1600# Exception raised:
1601# - ValueError if the init params are empty
1602#
1603######################################################################
1604class updateContainerPtfCmd(baseCmd):
1605 def __init__(self, params={}):
1606 self.resource = None
1607 self.icon_location = None
1609 super().__init__(params)
1611 self.required_params.extend(["resource"])
1613 def set_resource(self, resource):
1614 self.resource = resource
1616 def internal_do(self):
1617 super().internal_do()
1619 params = {"rel": "icon", "location": self.icon_location}
1620 cmd = addorUpdateExtLinkPtfCmd(params)
1621 cmd.set_resource(self.resource)
1622 cmd.do()
1625##########################################################################
1626##########################################################################
1627#
1628# Export Commands
1629#
1630##########################################################################
1631##########################################################################
1634class exportExtraDataPtfCmd(baseCmd):
1635 """
1636 Exports additional info, such as checked/false_positive attributes on extid/bibitemid
1638 force_pid is only used when the volume to be published becomes published
1639 Ex: AIF_0_0 becomes AIF_2018. We want to backup data in AIF_2018.json
1640 so that additional are restored when AIF_2018.xml is read
1642 export_all export all extids.
1643 If you want to archive, export_all should be False (checked extids are in the XML)
1644 If you want to store in a temp file (updateXML), then export_all should be True
1645 to preserve new extids found by the matching an not present in the XML
1647 if with_binary_files = True, copy in tempFolder, binary files set by ptf-tools ( extlink(rel='icon') )
1649 results: a json file on disk
1650 """
1652 def __init__(self, params=None):
1653 self.pid = None
1654 self.export_folder = None
1655 self.force_pid = None
1656 self.export_all = True
1657 self.with_binary_files = True
1659 super().__init__(params)
1661 self.required_params.extend(["pid", "export_folder"])
1663 def get_article_extra_info(self, article, export_all=False):
1664 data = None
1666 extids_data = []
1667 for extid in article.extid_set.all():
1668 extid_data = {}
1669 if export_all or not extid.checked or extid.false_positive: 1669 ↛ 1674line 1669 didn't jump to line 1674, because the condition on line 1669 was never false
1670 extid_data["type"] = extid.id_type
1671 extid_data["value"] = extid.id_value
1672 extid_data["checked"] = extid.checked
1673 extid_data["false_positive"] = extid.false_positive
1674 if extid_data: 1674 ↛ 1667line 1674 didn't jump to line 1667, because the condition on line 1674 was never false
1675 extids_data.append(extid_data)
1677 references_data = []
1678 for bib in article.bibitem_set.all():
1679 bibids_data = []
1680 for bibid in bib.bibitemid_set.all():
1681 bibid_data = {}
1682 if export_all or not bibid.checked or bibid.false_positive:
1683 bibid_data["type"] = bibid.id_type
1684 bibid_data["value"] = bibid.id_value
1685 bibid_data["checked"] = bibid.checked
1686 bibid_data["false_positive"] = bibid.false_positive
1688 if bibid_data:
1689 bibids_data.append(bibid_data)
1691 if bibids_data:
1692 references_data.append({"seq": bib.sequence, "bibids": bibids_data})
1694 icon = None
1696 for extlink in article.extlink_set.filter(rel="icon"):
1697 if self.with_binary_files is True:
1698 icon = extlink.location
1700 # copy des imgs associées via ptf-tools
1701 from_path = os.path.join(settings.RESOURCES_ROOT, extlink.location)
1702 to_path = os.path.join(self.export_folder, extlink.location)
1703 resolver.create_folder(os.path.dirname(to_path))
1704 resolver.copy_file(from_path, to_path)
1706 if (
1707 extids_data
1708 or references_data
1709 or article.date_published
1710 or article.date_online_first
1711 or icon
1712 ):
1713 data = {
1714 "pid": article.pid,
1715 "doi": article.doi,
1716 "extids": extids_data,
1717 "references": references_data,
1718 }
1720 if export_all and icon:
1721 data["icon"] = icon
1723 if export_all and article.date_published: 1723 ↛ 1724line 1723 didn't jump to line 1724, because the condition on line 1723 was never true
1724 data["date_published"] = article.date_published
1726 if export_all and article.date_pre_published: 1726 ↛ 1727line 1726 didn't jump to line 1727, because the condition on line 1726 was never true
1727 data["date_pre_published"] = article.date_pre_published
1729 if export_all and article.date_online_first:
1730 data["date_online_first"] = article.date_online_first
1732 if export_all:
1733 data["show_body"] = article.show_body
1734 data["do_not_publish"] = article.do_not_publish
1736 if ( 1736 ↛ 1741line 1736 didn't jump to line 1741
1737 export_all
1738 and settings.SITE_NAME == "ptf_tools"
1739 and not ((len(sys.argv) > 1 and sys.argv[1] == "test") or "pytest" in sys.modules)
1740 ):
1741 try:
1742 data["doi_status"] = article.doibatch.status
1743 data["doibatch_id"] = article.doibatch.id
1744 data["doibatch_xml"] = article.doibatch.xml
1745 except ObjectDoesNotExist:
1746 data["doi_status"] = 0
1748 return data
1750 def get_container_extra_info(self, container, export_all=False):
1751 result = {"pid": container.pid}
1753 collection = container.my_collection
1754 ptfSite = model_helpers.get_site_mersenne(collection.pid)
1756 if ptfSite and not self.force_pid:
1757 # si self.force_pid on est dans le cas où on passe un article de 0_0_0 vers issue final et dans ce cas là on ne conserve pas la deployed_date du 0_0_0
1758 deployed_date = container.deployed_date(ptfSite)
1759 if deployed_date:
1760 result["deployed_date"] = deployed_date
1762 icon = None
1763 for extlink in container.extlink_set.filter(rel="icon"): 1763 ↛ 1764line 1763 didn't jump to line 1764, because the loop on line 1763 never started
1764 icon = extlink.location
1765 if self.with_binary_files is True:
1766 # copy des imgs associées via ptf-tools
1767 from_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, extlink.location)
1768 to_path = os.path.join(self.export_folder, extlink.location)
1769 resolver.create_folder(os.path.dirname(to_path))
1770 resolver.copy_file(from_path, to_path)
1772 if export_all and icon: 1772 ↛ 1773line 1772 didn't jump to line 1773, because the condition on line 1772 was never true
1773 result["icon"] = icon
1775 articles_data = []
1776 for article in container.article_set.all():
1777 data = self.get_article_extra_info(article, export_all)
1778 if data:
1779 articles_data.append(data)
1781 result["articles"] = articles_data
1783 return result
1785 def internal_do(self):
1786 super().internal_do()
1787 article_pid = None
1789 resource = model_helpers.get_resource(self.pid)
1790 if not resource: 1790 ↛ 1791line 1790 didn't jump to line 1791, because the condition on line 1790 was never true
1791 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1793 obj = resource.cast()
1795 classname = obj.classname.lower()
1796 if classname == "article": 1796 ↛ 1797line 1796 didn't jump to line 1797, because the condition on line 1796 was never true
1797 article_pid = self.pid
1799 container = obj.get_container()
1800 container_pid = self.force_pid if self.force_pid else container.pid
1801 collection = container.get_top_collection()
1803 fct_name = f"get_{classname}_extra_info"
1804 ftor = getattr(self, fct_name, None)
1805 data = ftor(obj, self.export_all)
1807 file = resolver.get_archive_filename(
1808 self.export_folder,
1809 collection.pid,
1810 container_pid,
1811 "json",
1812 do_create_folder=True,
1813 article_pid=article_pid,
1814 )
1816 with open(file, "w", encoding="utf-8") as f:
1817 json.dump(data, f, default=myconverter)
1820class exportPtfCmd(baseCmd):
1821 """
1822 Generate the Article/Container/Collection XML
1824 Write on disk if export_folder is given as parameter
1825 Copy binary files if with_binary_files = True
1826 results: unicode string
1827 """
1829 def __init__(self, params=None):
1830 self.pid = None
1831 self.with_body = True
1832 self.with_djvu = True # No djvu in Mersenne web sites
1833 self.article_standalone = False # PCJ editor sets to True
1835 # Export le json des données internes (false_ids...).
1836 # Il faut alors un self.export_folder
1837 self.with_internal_data = False
1839 # Copie des fichiers binaires (PDF...) and l'export_folder
1840 self.with_binary_files = False
1842 self.export_folder = None
1844 # Permet de contrôler le répertoire source des fichiers binaires
1845 self.binary_files_folder = settings.RESOURCES_ROOT
1847 # Ajouter des métadonnées internes (deployed_date) ou non dans le XML
1848 self.for_archive = False
1850 # Permet au final d'exclure les articles marqués comme étant à ne pas publier
1851 self.export_to_website = False
1853 # Le XML dans l'onglet export n'a pas toutes les métadonnées
1854 self.full_xml = True
1856 super().__init__(params)
1858 self.required_params.extend(["pid"])
1860 def internal_do(self):
1861 super().internal_do()
1863 resource = model_helpers.get_resource(self.pid)
1864 if not resource: 1864 ↛ 1865line 1864 didn't jump to line 1865, because the condition on line 1864 was never true
1865 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1867 obj = resource.cast()
1869 # export Book ? need a visitor ? see oai_helpers
1870 if obj.classname == "Article":
1871 template_name = "oai/common-article_eudml-article2.xml"
1872 item_name = "article"
1873 elif obj.classname == "Container":
1874 if obj.ctype == "issue": 1874 ↛ 1878line 1874 didn't jump to line 1878, because the condition on line 1874 was never false
1875 template_name = "oai/common-issue_eudml-article2.xml"
1876 item_name = "container"
1877 else:
1878 template_name = "oai/book_bits.xml"
1879 item_name = "book"
1880 elif obj.classname == "Collection": 1880 ↛ 1884line 1880 didn't jump to line 1884, because the condition on line 1880 was never false
1881 template_name = "collection.xml"
1882 item_name = "collection"
1883 else:
1884 raise ValueError("Only articles, containers or collections can be exported")
1886 if self.export_folder and self.with_internal_data and obj.classname == "Container":
1887 params = {
1888 "pid": self.pid,
1889 "export_folder": self.export_folder,
1890 "with_binary_files": self.with_binary_files,
1891 }
1892 exportExtraDataPtfCmd(params).do()
1894 p = model_helpers.get_provider("mathdoc-id")
1895 for_export = not self.for_archive
1896 safetext_xml_body = render_to_string(
1897 template_name,
1898 {
1899 item_name: obj,
1900 "no_headers": True,
1901 "provider": p.name,
1902 "with_body": self.with_body,
1903 "with_djvu": self.with_djvu,
1904 "for_disk": True,
1905 "for_export": for_export,
1906 "full_xml": self.full_xml,
1907 "export_to_website": self.export_to_website,
1908 "article_standalone": self.article_standalone,
1909 },
1910 )
1911 xml_body = str(safetext_xml_body)
1912 if not self.full_xml: 1912 ↛ 1913line 1912 didn't jump to line 1913, because the condition on line 1912 was never true
1913 parser = etree.XMLParser(
1914 huge_tree=True,
1915 recover=True,
1916 remove_blank_text=False,
1917 remove_comments=True,
1918 resolve_entities=True,
1919 )
1920 tree = etree.fromstring(xml_body.encode("utf-8"), parser=parser)
1921 xml_body = etree.tostring(tree, pretty_print=True).decode("utf-8")
1923 if self.export_folder:
1924 if obj.classname == "Collection":
1925 # Export of a collection XML: we don't attempt to write in the top collection
1926 file = resolver.get_archive_filename(
1927 self.export_folder, obj.pid, None, "xml", True
1928 )
1929 with open(file, "w", encoding="utf-8") as f:
1930 f.write(xml_body)
1932 if self.with_binary_files: 1932 ↛ 1965line 1932 didn't jump to line 1965, because the condition on line 1932 was never false
1933 resolver.copy_binary_files(obj, self.binary_files_folder, self.export_folder)
1935 elif obj.classname == "Container": 1935 ↛ 1956line 1935 didn't jump to line 1956, because the condition on line 1935 was never false
1936 issue = obj
1937 collection = obj.get_top_collection()
1939 file = resolver.get_archive_filename(
1940 self.export_folder, collection.pid, issue.pid, "xml", True
1941 )
1943 with open(file, "w", encoding="utf-8") as f:
1944 f.write(xml_body)
1946 if self.with_binary_files:
1947 resolver.copy_binary_files(issue, self.binary_files_folder, self.export_folder)
1949 qs = issue.article_set.all()
1950 if self.for_archive: 1950 ↛ 1952line 1950 didn't jump to line 1952, because the condition on line 1950 was never false
1951 qs = qs.exclude(do_not_publish=True)
1952 for article in qs:
1953 resolver.copy_binary_files(
1954 article, self.binary_files_folder, self.export_folder
1955 )
1956 elif obj.classname == "Article":
1957 collection = obj.get_top_collection()
1958 file = resolver.get_archive_filename(
1959 self.export_folder, collection.pid, None, "xml", True
1960 )
1962 with open(file, "w", encoding="utf-8") as f:
1963 f.write(xml_body)
1965 return xml_body
1968class publishResourcePtfCmd(addPtfCmd):
1969 """
1970 Publish a container <=> Create a pub-date for all articles/book-parts of the container
1971 Publish an article <=> Create a pub-date
1972 """
1974 def __init__(self, params=None):
1975 self.params = params
1976 super().__init__(params)
1978 def set_resource(self, resource):
1979 obj = resource.cast()
1980 if obj.classname.find("Article") > -1: 1980 ↛ 1981line 1980 didn't jump to line 1981, because the condition on line 1980 was never true
1981 self.cmd = publishArticleDatabaseCmd(self.params)
1982 self.cmd.set_article(obj)
1983 else:
1984 self.cmd = publishContainerDatabaseCmd(self.params)
1985 self.cmd.set_container(obj)
1988def get_or_create_publisher(name):
1989 publisher = model_helpers.get_publisher(name)
1990 if publisher is None:
1991 publisher = PublisherData()
1992 publisher.name = name
1993 publisher = addPublisherPtfCmd({"xobj": publisher}).do()
1994 return publisher