Coverage for apps/ptf/tex.py: 71%
536 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-19 19:20 +0000
1import os
2import subprocess
3import tempfile
5import pypdf
6from pylatexenc.latexencode import unicode_to_latex
8from django.conf import settings
10from ptf import model_helpers
11from ptf import models
12from ptf import utils
13from ptf.cmds.xml import xml_utils
14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml
15from ptf.display import resolver
18def get_tex_keyword_date_published(colid, article, lang=""):
19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished"
21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]: 21 ↛ 22line 21 didn't jump to line 22, because the condition on line 21 was never true
22 if not article.date_online_first:
23 keyword = "\\dateposted"
25 if lang != "": 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true
26 keyword = "\\CDRsetmeta{traduction_date_posted}"
28 return keyword
31def get_tex_keyword_date_online_first(colid, article, lang=""):
32 return "\\dateposted"
35def read_tex_file(filename):
36 """
37 read a tex file. Detects if it is in utf-8 or iso-8859-1
38 returns a string of the body
39 """
41 lines = []
43 if os.path.isfile(filename): 43 ↛ 51line 43 didn't jump to line 51, because the condition on line 43 was never false
44 try:
45 with open(filename, encoding="utf-8") as f_:
46 lines = f_.readlines()
47 except UnicodeDecodeError:
48 with open(filename, encoding="iso-8859-1") as f_:
49 lines = f_.readlines()
51 return lines
54def convert_file_to_utf8(article_path, from_name, to_name):
55 filename = os.path.join(article_path, from_name)
56 lines = read_tex_file(filename)
58 user = settings.MERSENNE_TEX_USER
59 prefix = os.path.join(settings.LOG_DIR, "tmp/")
60 resolver.create_folder(prefix)
62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
63 fpath = f.name # ex: /tmp/Rxsft
64 f.write("".join(lines))
65 f.close()
67 out_filemane = os.path.join(article_path, to_name)
68 # copy to mersenne-tex
69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}"
70 utils.execute_cmd(cmd)
73def write_tex_file(filename, lines, create_temp_file=False):
74 fpath = filename
75 if create_temp_file: 75 ↛ 85line 75 didn't jump to line 85, because the condition on line 75 was never false
76 prefix = os.path.join(settings.LOG_DIR, "tmp/")
77 resolver.create_folder(prefix)
79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
81 fpath = f.name # ex: /tmp/Rxsft
82 f.write("".join(lines))
83 f.close()
84 else:
85 with open(filename, "w", encoding="utf-8") as f_:
86 f_.write("".join(lines))
87 return fpath
90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""):
91 if article.date_published is not None: 91 ↛ 97line 91 didn't jump to line 97, because the condition on line 91 was never false
92 keyword = get_tex_keyword_date_published(colid, article, lang)
93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'
94 new_lines.insert(begin_document_pos, str_)
95 begin_document_pos += 1
97 if article.my_container is not None: 97 ↛ 111line 97 didn't jump to line 111, because the condition on line 97 was never false
98 is_thematic_issue = len(article.my_container.title_html) > 0
99 is_issue_finalized = not article.my_container.with_online_first
100 is_thematic_finalized = is_thematic_issue and is_issue_finalized
102 if is_thematic_finalized and article.date_online_first is None: 102 ↛ 105line 102 didn't jump to line 105, because the condition on line 102 was never true
103 # Finalized thematic issue where the article did not go through online first
104 # => Add \datepublished so that "Issue date :" appears in the PDF
105 keyword2 = "\\datepublished"
106 if keyword2 != keyword:
107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'
108 new_lines.insert(begin_document_pos, str_)
109 begin_document_pos += 1
111 return begin_document_pos
114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""):
115 if article.date_online_first is not None:
116 keyword = get_tex_keyword_date_online_first(colid, article, lang)
117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n'
118 new_lines.insert(begin_document_pos, str_)
119 begin_document_pos += 1
121 return begin_document_pos
124def insert_end_page(new_lines, article, colid, begin_document_pos):
125 if article.lpage: 125 ↛ 129line 125 didn't jump to line 129, because the condition on line 125 was never false
126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n"
127 new_lines.insert(begin_document_pos + 1, str_)
129 return begin_document_pos
132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""):
133 r"""
134 add or replace \dateposted and \datepublished in the source Tex
135 lines is a list of lines of the source Tex
137 """
138 new_lines = []
139 bib_name = ""
141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang)
142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang)
143 found_date_online_first = False
144 found_date_published = False
145 begin_document_pos = -1
146 i = 0
147 skip_lines = 0
149 while i < len(lines):
150 line = lines[i]
151 len_line = len(line)
152 j = 0
153 while j < len_line and line[j] in [" ", "\t"]:
154 j += 1
156 if j < len_line and line[j] != "%": # the line is not a comment
157 if replace_frontpage_only and ( 157 ↛ 162line 157 didn't jump to line 162, because the condition on line 157 was never true
158 line.find("\\datepublished{", j) == j
159 or line.find("\\dateposted{", j) == j
160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j
161 ):
162 skip_lines += 1
163 elif ( 163 ↛ 166line 163 didn't jump to line 166
164 line.find(f"{keyword_date_published}{{", j) == j
165 ): # replace existing \datepublished
166 found_date_published = True
167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang)
169 elif ( 169 ↛ 172line 169 didn't jump to line 172
170 line.find(f"{keyword_date_online_first}{{", j) == j
171 ): # replace existing \dateposted
172 found_date_online_first = True
173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang)
175 elif (
176 line.find("\\begin{document", j) == j
177 ): # \begin{document} add dates if not present
178 begin_document_pos = i - skip_lines
179 new_lines.append(line)
181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:
182 # remove published from \documentclass to allow compilation
183 line = (
184 line.replace(",published,", ",")
185 .replace(",published", "")
186 .replace("published", "")
187 )
188 # # remove Unicode temporarily
189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "")
190 new_lines.append(line)
192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j: 192 ↛ 195line 192 didn't jump to line 195, because the condition on line 192 was never true
193 # Command to specify the last page (present in the front page)
194 # Move it after \begin{document}
195 pass
196 elif (
197 line.find("\\bibliography", j) == j
198 and line.find("\\bibliographystyle", j) != j
199 and replace_frontpage_only
200 ):
201 end = line.find("}")
202 if end > 0: 202 ↛ 210line 202 didn't jump to line 210, because the condition on line 202 was never false
203 bib_name = line[j + 14 : end]
204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n")
205 else:
206 new_lines.append(line)
207 else:
208 new_lines.append(line)
210 i += 1
212 if begin_document_pos > 0 and not found_date_online_first: 212 ↛ 217line 212 didn't jump to line 217, because the condition on line 212 was never false
213 begin_document_pos = insert_date_online_first(
214 new_lines, article, colid, begin_document_pos, lang=lang
215 )
217 if begin_document_pos > 0 and not found_date_published: 217 ↛ 222line 217 didn't jump to line 222, because the condition on line 217 was never false
218 begin_document_pos = insert_date_published(
219 new_lines, article, colid, begin_document_pos, lang=lang
220 )
222 if replace_frontpage_only and begin_document_pos > 0:
223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos)
225 # Always add Unicode as the new tex file is in utf-8
226 # new_lines = protect_tex(new_lines, "Unicode")
228 return new_lines, bib_name
231def protect_tex(lines, keyword="published"):
232 new_lines = []
234 i = 0
235 inside_documentclass = False
237 while i < len(lines):
238 line = lines[i]
239 len_line = len(line)
240 j = 0
241 while j < len_line and line[j] in [" ", "\t"]:
242 j += 1
244 if j < len_line and line[j] != "%": # the line is not a comment
245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:
246 # add published to \documentclass after compilation
247 j = line.find("]")
248 if j > 0: 248 ↛ 252line 248 didn't jump to line 252, because the condition on line 248 was never false
249 if line.find("{cedram") > 0: # Ignore {article} 249 ↛ 264line 249 didn't jump to line 264, because the condition on line 249 was never false
250 line = line[0:j] + "," + keyword + line[j:]
251 else:
252 inside_documentclass = True
253 elif inside_documentclass: 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true
254 k = line.find("]")
255 if k == j:
256 if line.find("{cedram") > 0: # Ignore {article}
257 new_lines.append(f",{keyword}\n")
258 inside_documentclass = False
259 elif k > -1:
260 if line.find("{cedram") > 0: # Ignore {article}
261 line = line[0:k] + "," + keyword + line[k:]
262 inside_documentclass = False
264 new_lines.append(line)
265 i += 1
267 return new_lines
270def get_tex_corresponding_emails(author_contributions):
271 emails = []
273 for contribution in author_contributions:
274 if contribution.corresponding and contribution.email: 274 ↛ 275line 274 didn't jump to line 275, because the condition on line 274 was never true
275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_"))
277 return emails
280def get_tex_authors(author_contributions):
281 lines = []
283 # are_all_equal = models.are_all_equal_contrib(author_contributions)
285 for contribution in author_contributions:
286 # \author{\firstname{Antoine} \lastname{Lavoisier}}
287 # \address{Rue sans aplomb, Paris, France}
288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu}
289 first_name = unicode_to_latex(contribution.first_name)
290 last_name = unicode_to_latex(contribution.last_name)
291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}"
292 if contribution.orcid:
293 line += f"\\CDRorcid{{{contribution.orcid}}}"
294 if contribution.equal_contrib: # and not are_all_equal: 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true
295 line += "\\IsEqualContrib"
296 if contribution.deceased_before_publication: # and not are_all_equal: 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true
297 line += "\\dead"
298 lines.append(line + "}\n")
300 for contribaddress in contribution.contribaddress_set.all():
301 address = unicode_to_latex(contribaddress.address)
302 lines.append(f"\\address{{{address}}}\n")
304 if contribution.corresponding and len(contribution.email) > 0: 304 ↛ 305line 304 didn't jump to line 305, because the condition on line 304 was never true
305 email = unicode_to_latex(contribution.email)
306 lines.append(f"\\email{{{email}}}\n")
308 lines.append("\n")
310 return lines
313def create_tex_for_pcj(article):
314 pci = article.get_pci_section()
316 extid = model_helpers.get_extid(article, "rdoi")
317 rdoi = extid.id_value if extid is not None else ""
319 lines = [
320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n",
321 "\\usepackage{pax}\n",
322 "\\usepackage{mathrsfs}\n" "\n",
323 "\\issueinfo{"
324 + article.my_container.volume
325 + "}{}{}{"
326 + article.my_container.year
327 + "}\n",
328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n",
329 f"\\DOI{{{article.doi}}}\n",
330 f"\\RDOI{{{rdoi}}}\n",
331 f"\\setPCI{{{pci}}}\n",
332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}",
333 ]
335 conf = article.get_conference()
336 if len(conf) > 0: 336 ↛ 337line 336 didn't jump to line 337, because the condition on line 336 was never true
337 lines.append(f"\\setPCIconf{{{conf}}}\n")
339 author_contributions = article.get_author_contributions()
341 corresponding_emails = get_tex_corresponding_emails(author_contributions)
342 for email in corresponding_emails: 342 ↛ 343line 342 didn't jump to line 343, because the loop on line 342 never started
343 lines.append(f"\\PCIcorresp{{{email}}}\n")
345 lines.append("\n")
347 # \title[Sample for the template]{Sample for the template, with quite a very long title}
348 title = article.title_tex.replace("<i>", "|||i|||").replace("</i>", "|||/i|||")
349 title = title.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||")
350 title = title.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||")
351 title = unicode_to_latex(title)
352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")
353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")
354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")
355 lines.append(f"\\title{{{title}}}\n")
356 lines.append("\n")
357 lines.extend(get_tex_authors(author_contributions))
359 # No keywords for PCJ
360 # # \keywords{Example, Keyword}
361 # kwd_gps = article.get_non_msc_kwds()
362 # if len(kwd_gps) > 0:
363 # kwd_gp = kwd_gps.first()
364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()])
365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n")
366 # lines.append("\n")
368 abstracts = article.get_abstracts()
369 if len(abstracts) > 0: 369 ↛ 419line 369 didn't jump to line 419, because the condition on line 369 was never false
370 abstract = abstracts.first()
371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True)
373 # .replace('<span class="mathjax-formula">$', '$').replace('$</span>', '$') \
374 # .replace('<span class="italique">', '|||i|||').replace('</span>', '|||/i|||') \
376 # value = abstract.value_tex \
377 # .replace('<i>', '|||i|||').replace('</i>', '|||/i|||') \
378 # .replace('<strong>', '|||strong|||').replace('</strong>', '|||/strong|||') \
379 # .replace('<sub>', '|||sub|||').replace('</sub>', '|||/sub|||') \
380 # .replace('<sup>', '|||sup|||').replace('</sup>', '|||/sup|||') \
381 # .replace('<p>', '').replace('</p>', '') \
382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \
383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \
384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \
385 # .replace('<br/>', '|||newline|||') \
386 # .replace('&', '\\&') \
387 # .replace('<', '<') \
388 # .replace('>', '>')
389 #
390 # links = []
391 # pos = value.find("<a href=")
392 # while pos != -1:
393 # last_href = value.find('"', pos + 9)
394 # href = value[pos + 9:last_href]
395 # first_text = value.find('>', last_href) + 1
396 # last_text = value.find('</a>', first_text)
397 # text = value[first_text:last_text]
398 # links.append((href, text))
399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:]
400 # pos = value.find("<a href=")
401 #
402 # value = unicode_to_latex(value)
403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}')
404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}')
405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}')
406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}')
407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n')
408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n')
409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n')
410 # value = value.replace('|||newline|||', '\\newline\n')
411 # for link in links:
412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}'
413 # value = value.replace('|||a|||', text, 1)
415 lines.append("\\begin{abstract}\n")
416 lines.append(value + "\n")
417 lines.append("\\end{abstract}\n")
419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD"
420 keyword = get_tex_keyword_date_published("PCJ", article)
421 lines.append(f"{keyword}{{{date_}}}\n")
423 lines.append("\\begin{document}\n")
424 lines.append("\\maketitle\n")
425 article_pdf = f"article_{article.pid}.pdf"
426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n")
428 lines.append("\\end{document}\n")
430 return lines
433def compile_tex(lines, article, update=False):
434 """
435 1) Create a tex file from the list of lines
436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ)
437 3) Compile the file
438 4) Replace the pdf in /mersenne_test_data
439 5) linearize the pdf
440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod)
441 """
443 # Only allowed on ptf-tools
444 if settings.SITE_NAME != "ptf_tools": 444 ↛ 445line 444 didn't jump to line 445, because the condition on line 444 was never true
445 return
447 user = settings.MERSENNE_TEX_USER
448 issue = article.my_container
449 colid = issue.my_collection.pid
450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid)
451 article_pdf = ""
453 if colid != "PCJ": 453 ↛ 454line 453 didn't jump to line 454, because the condition on line 453 was never true
454 article_tex_name = article.get_ojs_id()
455 if not article_tex_name:
456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
457 article_path = os.path.join(issue_path, article_tex_name)
458 else:
459 article_tex_name = article.pid
460 article_path = os.path.join(issue_path, article_tex_name)
461 article_pdf = f"article_{article.pid}.pdf"
463 if not update:
464 # Create the article folder
465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}"
466 utils.execute_cmd(cmd)
468 # copy the pdf to mersenne-tex
469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)
470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder)
471 pdf_file_name = os.path.join(folder, article.pid + ".pdf")
473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}"
474 utils.execute_cmd(cmd)
476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
477 fpath = write_tex_file("", lines, create_temp_file=True)
479 # copy to mersenne-tex
480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
481 utils.execute_cmd(cmd)
482 # os.unlink(f.name)
484 # recompile article
485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
486 # execute script to compile
487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}"
488 utils.execute_cmd(cmd)
490 # replace pdf
491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf")
492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)
493 to_path = os.path.join(
494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf"
495 )
496 if settings.MERSENNE_CREATE_FRONTPAGE: 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true
497 utils.linearize_pdf(cedram_pdf_location, to_path)
499 return to_path
502def add_outline(reader, writer, outlines, parent=None):
503 child_parent = parent
504 for item in outlines:
505 if type(item) == list: 505 ↛ 506line 505 didn't jump to line 506, because the condition on line 505 was never true
506 add_outline(reader, writer, item, child_parent)
507 else:
508 title = item["/Title"]
509 page_num = reader.get_destination_page_number(item)
511 if item["/Type"] == "/XYZ": 511 ↛ 522line 511 didn't jump to line 522, because the condition on line 511 was never false
512 child_parent = writer.add_outline_item(
513 title,
514 page_num,
515 parent,
516 None,
517 False,
518 False,
519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)),
520 )
521 else:
522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False)
525def test():
526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf"
527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf"
528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf"
530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)
531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)
532 pdf_writer = pypdf.PdfWriter()
534 for page in range(len(pdf_reader_fp.pages)):
535 current_page = pdf_reader_fp.pages[page]
536 if page == 0:
537 pdf_writer.add_page(current_page)
539 for page in range(len(pdf_reader_content.pages)):
540 current_page = pdf_reader_content.pages[page]
541 if page > 0:
542 pdf_writer.add_page(current_page)
544 # Add the Table of Contents (sidebar in a PDF reader)
545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)
547 # Add the anchors
548 for dest in pdf_reader_content.named_destinations.values():
549 pdf_writer.add_named_destination_object(dest)
551 with open(merged_pdf, "wb") as f_:
552 pdf_writer.write(f_)
554 # Add metadata to the PDF, including EXIF data
555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf)
557 exit()
559 fpage = "i"
560 merged_pdf = "/home/touvierj/Bureau/good2.pdf"
561 local_pdf = "/home/touvierj/Bureau/new2.pdf"
563 is_roman = False
564 try:
565 first_page = int(fpage)
566 except ValueError:
567 first_page = xml_utils.roman_to_int(fpage)
568 is_roman = True
570 reader = pypdf.PdfReader(merged_pdf)
571 writer = pypdf.PdfWriter()
572 for page in reader.pages:
573 writer.add_page(page)
575 if is_roman:
576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")
577 else:
578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")
579 writer.write(local_pdf)
580 writer.close()
583def add_metadata(article, in_pdf, out_pdf):
584 reader = pypdf.PdfReader(in_pdf, strict=False)
586 metadata = reader.metadata
587 cmd = f"exiftool -tagsFromFile {in_pdf}"
589 if in_pdf == out_pdf: 589 ↛ 590line 589 didn't jump to line 590, because the condition on line 589 was never true
590 cmd += " -overwrite_original_in_place"
592 container = article.my_container
593 collection = article.get_collection()
595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type()
596 keywords = ", ".join([str(x.value) for x in kwds])
598 lang = ""
599 if article.lang == "fr": 599 ↛ 600line 599 didn't jump to line 600, because the condition on line 599 was never true
600 lang = "fr-FR"
601 elif article.lang == "en": 601 ↛ 604line 601 didn't jump to line 604, because the condition on line 601 was never false
602 lang = "en-GB"
604 if "/Title" in metadata: 604 ↛ 611line 604 didn't jump to line 611, because the condition on line 604 was never false
605 title = metadata["/Title"]
606 if "'" in title and '"' not in title: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true
607 cmd += f' -Title="{title}"'
608 elif "'" not in title: 608 ↛ 611line 608 didn't jump to line 611, because the condition on line 608 was never false
609 cmd += f" -Title='{title}'"
611 if "/Author" in metadata: 611 ↛ 618line 611 didn't jump to line 618, because the condition on line 611 was never false
612 author = metadata["/Author"]
613 if "'" in author and '"' not in author:
614 cmd += f' -Author="{author}"'
615 elif "'" not in author: 615 ↛ 618line 615 didn't jump to line 618, because the condition on line 615 was never false
616 cmd += f" -Author='{author}'"
618 cmd += " -Creator='Centre Mersenne'"
619 cmd += " -Subject=''"
620 if lang: 620 ↛ 622line 620 didn't jump to line 622, because the condition on line 620 was never false
621 cmd += f" -xmp-dc-Language='{lang}'"
622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'"
623 cmd += f" -xmp-prism:DOI='{article.doi}'"
624 cmd += f" -Keywords='{keywords}'"
625 cmd += f" -xmp-xmp:Keywords='{keywords}'"
626 cmd += f" -xmp-pdf:Keywords='{keywords}'"
627 cmd += " -xmp-pdf:Copyright='© The author(s)'"
629 if container.volume: 629 ↛ 631line 629 didn't jump to line 631, because the condition on line 629 was never false
630 cmd += f" -xmp-prism:Volume='{container.volume}'"
631 if container.number: 631 ↛ 632line 631 didn't jump to line 632, because the condition on line 631 was never true
632 cmd += f" -xmp-prism:Number='{container.number}'"
633 if collection.issn: 633 ↛ 635line 633 didn't jump to line 635, because the condition on line 633 was never false
634 cmd += f" -xmp-prism:ISSN='{collection.issn}'"
635 if collection.e_issn: 635 ↛ 637line 635 didn't jump to line 637, because the condition on line 635 was never false
636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'"
637 if container.title_tex: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true
638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'"
639 cmd += " " + out_pdf
641 output = subprocess.check_output(cmd, shell=True)
642 return output
645def replace_front_page(
646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name
647):
648 # At the point the PDF has been recompiled, possibly with a new template
649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV
651 user = settings.MERSENNE_TEX_USER
653 # Copy the PDF files locally (pypdf is installed in ptf-tools)
654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP")
655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}"
656 utils.execute_cmd(cmd)
658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content")
659 cmd = f"scp {user}@mathdoc-tex:{content_pdf_file_name} {local_content_pdf}"
660 utils.execute_cmd(cmd)
662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)
663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)
664 pdf_writer = pypdf.PdfWriter()
666 for page in range(len(pdf_reader_fp.pages)):
667 current_page = pdf_reader_fp.pages[page]
668 if page == 0:
669 pdf_writer.add_page(current_page)
671 for page in range(len(pdf_reader_content.pages)):
672 current_page = pdf_reader_content.pages[page]
673 if page > 0:
674 pdf_writer.add_page(current_page)
676 # Add the Table of Contents (sidebar in a PDF reader)
677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)
679 # Add the anchors
680 for dest in pdf_reader_content.named_destinations.values():
681 pdf_writer.add_named_destination_object(dest)
683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged")
684 with open(merged_pdf, "wb") as f_:
685 pdf_writer.write(f_)
687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file.
688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}"
690 utils.execute_cmd(cmd)
692 # Add metadata to the PDF, including EXIF data
693 add_metadata(article, local_content_pdf, merged_pdf)
695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf")
697 # pypdf creates a PDF that starts on page 1, fix it
698 if article.fpage: 698 ↛ 719line 698 didn't jump to line 719, because the condition on line 698 was never false
699 is_roman = False
700 try:
701 first_page = int(article.fpage)
702 except ValueError:
703 first_page = xml_utils.roman_to_int(article.fpage)
704 is_roman = True
706 reader = pypdf.PdfReader(merged_pdf)
707 writer = pypdf.PdfWriter()
708 for page in reader.pages:
709 writer.add_page(page)
711 if is_roman: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true
712 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")
713 else:
714 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")
715 writer.write(local_pdf)
716 writer.close()
718 # copy to mersenne-tex
719 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}"
720 utils.execute_cmd(cmd)
723def compile_article(
724 article,
725 colid,
726 issue_id,
727 article_path,
728 article_tex_name,
729 replace_frontpage_only=False,
730 skip_compilation=False,
731 lang="",
732):
733 user = settings.MERSENNE_TEX_USER
735 if lang != "": 735 ↛ 736line 735 didn't jump to line 736, because the condition on line 735 was never true
736 article_tex_name += "-" + lang
738 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")
739 # Regular compilation: compiled_pdf and final_pdf are the same
740 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page
741 # final_pdf is the pdf after the merge (new front page; old content)
742 compiled_pdf_file_name = final_pdf_file_name = os.path.join(
743 article_path, article_tex_name + ".pdf"
744 )
745 content_pdf_file_name = compiled_pdf_file_name + "_SAV"
747 # Save the pdf file
748 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {content_pdf_file_name}"
749 utils.execute_cmd(cmd)
751 # Save the cfg file (no cfg for translations)
752 if lang == "": 752 ↛ 757line 752 didn't jump to line 757, because the condition on line 752 was never false
753 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV"
754 utils.execute_cmd(cmd)
756 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.
757 prefix = os.path.join(settings.LOG_DIR, "tmp/")
758 resolver.create_folder(prefix)
760 if replace_frontpage_only:
761 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files
762 article_tex_name2 = article_tex_name + "_FP"
764 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*"
765 utils.execute_cmd(cmd)
767 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
768 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex")
769 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}"
770 utils.execute_cmd(cmd)
772 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg")
773 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV"
774 utils.execute_cmd(cmd)
776 article_cdrdoidates_file_name = os.path.join(
777 article_path, article_tex_name + ".cdrdoidates"
778 )
779 if os.path.isfile(article_cdrdoidates_file_name): 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true
780 article_cdrdoidates_file_name2 = os.path.join(
781 article_path, article_tex_name2 + ".cdrdoidates"
782 )
783 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}"
784 utils.execute_cmd(cmd)
786 article_tex_name = article_tex_name2
787 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")
788 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf")
789 final_pdf_file_name = compiled_pdf_file_name + ".new"
791 if not skip_compilation: 791 ↛ 843line 791 didn't jump to line 843, because the condition on line 791 was never false
792 # Remove \ItIsPublished from the cfg file
793 if lang == "": 793 ↛ 799line 793 didn't jump to line 799, because the condition on line 793 was never false
794 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"'''
795 utils.execute_cmd(cmd)
796 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"'''
797 utils.execute_cmd(cmd)
799 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
801 # Save the tex file
802 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV"
803 utils.execute_cmd(cmd)
805 lines = read_tex_file(article_tex_file_name)
806 new_lines, bib_name = replace_dates_in_tex(
807 lines, article, colid, replace_frontpage_only, lang=lang
808 )
810 if bib_name and replace_frontpage_only:
811 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib")
813 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
814 fpath = f.name # ex: /tmp/Rxsft
815 f.write("".join(new_lines))
816 f.close()
818 # copy to mersenne-tex
819 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
820 utils.execute_cmd(cmd)
821 # os.unlink(f.name)
823 # recompile article
824 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
825 # execute script to compile
826 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}"
827 utils.execute_cmd(cmd)
829 # Protect the tex file with the 'published' option
830 new_lines = protect_tex(new_lines)
832 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.
833 prefix = os.path.join(settings.LOG_DIR, "tmp/")
834 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
835 fpath = f.name # ex: /tmp/Rxsft
836 f.write("".join(new_lines))
837 f.close()
839 # copy to mersenne-tex
840 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
841 utils.execute_cmd(cmd)
843 if replace_frontpage_only:
844 # At the point the PDF has been recompiled, possibly with a new template
845 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV
846 replace_front_page(
847 article,
848 article_tex_name,
849 compiled_pdf_file_name,
850 content_pdf_file_name,
851 final_pdf_file_name,
852 )
854 # Copy PDF to MERSENNE_TEST_DATA_FOLDER
855 datastream = article.datastream_set.filter(mimetype="application/pdf").get()
856 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location)
857 # remove destination if exists to test if final pdf is really created
858 if os.path.exists(to_path): 858 ↛ 860line 858 didn't jump to line 860, because the condition on line 858 was never false
859 os.remove(to_path)
860 utils.linearize_pdf(final_pdf_file_name, to_path)
862 # if not replace_frontpage_only:
863 # # Add EXIF metadata in the final PDF (replace_front_page already does it)
864 # add_metadata(article, to_path, to_path)
867def create_frontpage(
868 colid,
869 container,
870 updated_articles,
871 test=True,
872 replace_frontpage_only=False,
873 skip_compilation=False,
874 lang="",
875):
876 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX
877 # flow :
878 # get directory of article sources : cedram_dev/production/ ..
879 # Add publication date in the source TeX
880 # remote execute latexmk -pdf article.pdf
881 # replace pdf of the article on mersenne_test_data
883 # TODO refactor the code and only use compile_tex for all collections
885 if colid == "PCJ":
886 for article in updated_articles:
887 lines = create_tex_for_pcj(article)
888 compile_tex(lines, article, update=True)
889 return
891 try:
892 year = int(container.year)
893 except ValueError:
894 year = 0
896 if ( 896 ↛ 902line 896 didn't jump to line 902
897 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]
898 and year < 2020
899 and lang == ""
900 ):
901 # No front page for Elsevier CRAS
902 return
904 issue_id = container.pid
906 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id)
907 # non utilisé ?now = datetime.now().astimezone()
908 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z")
910 try:
911 for article in updated_articles:
912 # article path
913 article_tex_name = article.get_ojs_id()
914 if not article_tex_name: 914 ↛ 915line 914 didn't jump to line 915, because the condition on line 914 was never true
915 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
916 article_path = os.path.join(issue_path, article_tex_name)
917 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf')
919 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt")
920 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt")
922 # flow :
923 # - si on est en test ;
924 # date_pre_publish a été mis à jour mais pas les autres
925 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first
926 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp
927 # avec XXXX-XX-XX ferait apparaitre cette date)
928 # - si article.my_container.with_online_first && et pas de article.date_online_first existe:
929 # : on met XXXX-xx-xx pour online first
930 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé)
931 # - si on passe en prod, on prend les dates de l'article
932 # si container.with_online_first:
933 # article.date_online_first ds le bon fichier
934 # si article.date_published : on met à jour le fichier qui va bien
936 if not test and (article.date_online_first or article.date_published): 936 ↛ 911line 936 didn't jump to line 911, because the condition on line 936 was never false
937 compile_article(
938 article,
939 colid,
940 issue_id,
941 article_path,
942 article_tex_name,
943 replace_frontpage_only,
944 skip_compilation,
945 lang,
946 )
948 except Exception as e:
949 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place
950 raise e
953def create_translated_pdf(
954 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False
955):
956 user = settings.MERSENNE_TEX_USER
958 issue_path = resolver.get_cedram_issue_tex_folder(
959 article.get_top_collection().pid, article.my_container.pid
960 )
961 article_tex_name = article.get_ojs_id()
962 if not article_tex_name:
963 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
964 article_path = os.path.join(issue_path, article_tex_name)
966 xml_base_name = article_tex_name + ".xml"
967 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name)
968 remote_xml = os.path.join(article_path, xml_base_name)
970 if not skip_compilation:
971 # Create the XML file locally
972 with open(local_xml, "w", encoding="utf-8") as file_:
973 file_.write(xml_content)
975 # Copy XML file to mersenne-tex
976 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}"
977 utils.execute_cmd(cmd)
979 remote_html_base_name = f"trad-{lang}.html"
980 remote_html = os.path.join(article_path, remote_html_base_name)
981 # Copy HTML file to mersenne-tex
982 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}"
983 utils.execute_cmd(cmd)
985 # Create the PDF
986 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
987 # execute script to compile
988 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}"
989 utils.execute_cmd(cmd)
991 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf"
992 remote_pdf = os.path.join(article_path, remote_pdf_base_name)
993 # pdf-traduction should have created remote.pdf
994 # Copy the PDF file
995 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}"
996 utils.execute_cmd(cmd)