Coverage for apps/ptf/cmds/xml/ckeditor/ckeditor

1##################################################################################################

3# README

5# ckeditor_parser.py parses the HTML strings created by a CKEditor

6# with tex formulas inside <span class="math-tex">

7# It returns the JATS equivalent.

9# Ex: <p>Te<st <span class="math-tex">$x = {-b \pm \sqrt{b^2-4ac} \over 2a}$</span> done</p>

10# <ul><li>Item</li></ul><ol><li>Item 1<br />New line</li><li> </li></ol>

11#

12##################################################################################################

14if __name__ == "__main__": 14 ↛ 15line 14 didn't jump to line 15, because the condition on line 14 was never true

15 import os

16 import sys

18 BASE_DIR = os.path.dirname(

19 os.path.dirname(

20 os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

21 )

22 )

23 sys.path.append(BASE_DIR)

25import os

27from lxml import etree

29from django.conf import settings

31from ptf.cmds.xml.xml_utils import escape

32from ptf.cmds.xml.xml_utils import normalize

33from ptf.cmds.xml.xml_utils import replace_html_entities

34from ptf.utils import create_innerlink_for_citation

37class CkeditorParser:

38 def __init__(self, *args, **kwargs):

39 self.warnings = []

40 self.value_xml = ""

41 self.value_html = ""

42 self.value_tex = ""

44 if "tree" not in kwargs and "html_value" in kwargs: 44 ↛ 56line 44 didn't jump to line 56, because the condition on line 44 was never false

45 parser = etree.XMLParser(

46 huge_tree=True,

47 recover=True,

48 remove_blank_text=False,

49 remove_comments=True,

50 resolve_entities=True,

51 )

52 html_value = kwargs["html_value"].replace("\n\n", "")

53 body = f"<body>{replace_html_entities(html_value)}</body>"

54 tree = etree.fromstring(body.encode("utf-8"), parser=parser)

55 else:

56 tree = kwargs["tree"]

58 self.mml_formulas = kwargs["mml_formulas"]

59 self.ignore_p = kwargs["ignore_p"] if "ignore_p" in kwargs else False

60 self.pid = kwargs.get("pid", None)

61 self.volume = kwargs.get("volume", None)

62 self.issue_pid = kwargs.get("issue_pid", None)

63 self.check_citation = kwargs.get("check_citation", False)

64 self.biblio = kwargs.get("biblio", None)

66 self.parse_tree(tree)

68 def parse_formula(self, node, **kwargs):

69 formula = node.text or ""

70 display = kwargs.get("display", None)

71 if len(formula) > 0 and formula.find("\\(") == 0:

72 formula = formula[2:-2]

73 # elif len(formula) > 0 and formula.find("\[") == 0:

74 # formula = formula[1:-1]

75 mml = ""

76 if len(self.mml_formulas) > 0:

77 mml = self.mml_formulas.pop(0)

79 is_inline = True

80 parent = node.getparent()

81 if parent is not None and parent.tag == "p" and not parent.text and not parent.tail:

82 is_inline = False

84 formula = f"${formula}$"

85 if mml:

86 html_text = f'<span class="mathjax-formula" title="{formula}">{mml}</span>'

87 elif display: 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 html_text = f'<span class="mathjax-formula display" title="{formula}">{formula}</span>'

89 else:

90 html_text = f'<span class="mathjax-formula" title="{formula}">{formula}</span>'

91 tex_text = formula

93 if is_inline:

94 xml_text = "<inline-formula><alternatives>"

95 if len(mml) > 0:

96 xml_text += mml

97 xml_text += f"<tex-math>{escape(formula)}</tex-math>"

98 xml_text += "</alternatives></inline-formula>"

99 else:

100 prefix = '<table class="formula mathjax-formula"><tr><td class="formula-inner">'

101 suffix = '</td><td class="formula-label"></td></tr></table>'

102 html_text = prefix + html_text + suffix

103 tex_text = prefix + tex_text + suffix

104

105 xml_text = '<disp-formula xml:space="preserve">\n<alternatives>'

106 if len(mml) > 0:

107 xml_text += mml

108 xml_text += f"<tex-math>{escape(formula)}</tex-math>"

109 xml_text += "</alternatives></disp-formula>"

110

111 return html_text, tex_text, xml_text

112

113 def parse_list(self, node, **kwargs):

114 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

115 node, **kwargs

116 )

117

118 list_type = "simple" if node.tag == "ul" else "number"

119

120 xml_text = f'<list list-type="{list_type}">'

121 xml_text += inner_jats_xml_text

122 xml_text += "</list>"

123

124 # # JATS requires <list> to be inside <p>

125 # parent = node.getparent()

126 # if parent is None or parent.tag != "p":

127 # xml_text = f"<p>{xml_text}</p>"

128 html_text = f"<{node.tag}>{inner_html_text}</{node.tag}>"

129 tex_text = f"<{node.tag}>{inner_tex_text}</{node.tag}>"

130

131 return html_text, tex_text, xml_text

132

133 def parse_node_inner(self, node, **kwargs):

134 """

135 Used by parse_node_with_mixed_content for nodes that have a different tag in JATS or HTML

136 :param node:

137 :param kwargs:

138 :return:

139 """

140

141 kwargs["is_top"] = False

142 inner_html_text = inner_tex_text = inner_jats_xml_text = ""

143

144 if node.text:

145 text = node.text

146

147 if len(text) > 0 and text[0] == "\n" and node.tag in ("list", "item"): 147 ↛ 148line 147 didn't jump to line 148, because the condition on line 147 was never true

148 text = text[1:]

149

150 inner_jats_xml_text += escape(text)

151 inner_html_text += escape(text) if kwargs["escape"] else text

152 inner_tex_text += escape(text) if kwargs["escape"] else text

153

154 if self.check_citation and node.tag != "a": 154 ↛ 155line 154 didn't jump to line 155, because the condition on line 154 was never true

155 inner_html_text = create_innerlink_for_citation(inner_html_text, self.biblio)

156

157 for i in range(len(node)):

158 child = node[i]

159

160 (

161 child_html_text,

162 child_tex_text,

163 child_jats_xml_text,

164 ) = self.parse_node_with_mixed_content(child, **kwargs)

165 inner_html_text += child_html_text

166 inner_tex_text += child_tex_text

167 inner_jats_xml_text += child_jats_xml_text

168

169 return inner_html_text, inner_tex_text, inner_jats_xml_text

170

171 def parse_node_with_a(self, node, **kwargs):

172 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

173 node, **kwargs

174 )

175

176 href = ""

177 for attrib in node.attrib:

178 name = normalize(attrib)

179 if name == "href":

180 href = node.attrib[attrib]

181

182 if not href:

183 href = inner_tex_text

184

185 html_text = f'<a href="{href}">{inner_html_text}</a>'

186 tex_text = f'<a href="{href}">{inner_tex_text}</a>'

187 xml_text = f'<ext-link ext-link-type="uri" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="{href}">{inner_jats_xml_text}</ext-link>'

188

189 return html_text, tex_text, xml_text

190

191 def parse_node_with_br(self, node, **kwargs):

192 html_text = tex_text = "<br/>"

193 xml_text = "<break/>"

194

195 return html_text, tex_text, xml_text

196

197 def parse_node_with_colgroup(self, node, **kwargs):

198 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

199 node, **kwargs

200 )

201 classe = ""

202 for attrib in node.attrib:

203 name = normalize(attrib)

204 if name == "class":

205 classe = node.attrib[name]

206 html_text = f"<colgroup class={classe}>{inner_html_text}</colgroup>"

207 tex_text = f"<colgroup class={classe}>{inner_tex_text}</colgroup>"

208

209 xml_text = '<colgroup xml:space="preserve">' + inner_jats_xml_text + "</colgroup>"

210 return html_text, tex_text, xml_text

211

212 def parse_node_with_col(self, node, **kwargs):

213 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

214 node, **kwargs

215 )

216 classe = ""

217 style = ""

218 for attrib in node.attrib:

219 name = normalize(attrib)

220 if name == "class":

221 classe = node.attrib[name]

222 elif name == "style":

223 style = node.attrib[name]

224 if classe:

225 html_text = f"<col class={classe} style='{style}'>{inner_html_text}</col>"

226 tex_text = f"<col class={classe} style='{style}'>{inner_tex_text}</col>"

227 else:

228 html_text = f"<col style='{style}'>{inner_html_text}</col>"

229 tex_text = f"<col style='{style}'>{inner_tex_text}</col>"

230

231 xml_text = '<col xml:space="preserve">' + inner_jats_xml_text + "</col>"

232 return html_text, tex_text, xml_text

233

234 def parse_node_with_div(self, node, **kwargs):

235 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

236 node, **kwargs

237 )

238 classe = ""

239 for attrib in node.attrib:

240 name = normalize(attrib)

241 if name == "class":

242 classe = node.attrib[name]

243 # Next condition checks style identification with pandoc library used

244 # for docx --> html conversion

245 elif name == "data-custom-style":

246 if node.attrib[name] == "PCJ Equation":

247 classe = "mathjax-formula PCJ-Equation"

248 else:

249 classe = node.attrib[name].replace(" ", "-")

250 if classe == "PCJ-Section" and "References" in inner_html_text:

251 html_text = tex_text = xml_text = ""

252 return html_text, tex_text, xml_text

253 elif classe == "PCJ-Reference":

254 html_text = tex_text = xml_text = ""

255 return html_text, tex_text, xml_text

256

257 html_text = f"<div class='{classe}'>{inner_html_text}</div>"

258 tex_text = f"<div class='{classe}'>{inner_tex_text}</div>"

259

260 xml_text = '<div xml:space="preserve">' + inner_jats_xml_text + "</div>"

261 return html_text, tex_text, xml_text

262

263 def parse_node_with_em(self, node, **kwargs):

264 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

265 node, **kwargs

266 )

267

268 html_text = f'<span class="italique">{inner_html_text}</span>'

269 tex_text = f"<i>{inner_tex_text}</i>"

270

271 if len(inner_jats_xml_text) > 0:

272 xml_text = f"<italic>{inner_jats_xml_text}</italic>"

273 else:

274 xml_text = "<italic/>"

275

276 return html_text, tex_text, xml_text

277

278 def parse_node_with_h1(self, node, **kwargs):

279 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

280 node, **kwargs

281 )

282 classe = ""

283 for attrib in node.attrib:

284 name = normalize(attrib)

285 if name == "class":

286 classe = node.attrib[name]

287 html_text = f"<h1 class={classe}>{inner_html_text}</h1>"

288 tex_text = f"<h1 class={classe}>{inner_tex_text}</h1>"

289

290 xml_text = '<h1 xml:space="preserve">' + inner_jats_xml_text + "</h1>"

291

292 return html_text, tex_text, xml_text

293

294 def parse_node_with_h2(self, node, **kwargs):

295 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

296 node, **kwargs

297 )

298 classe = ""

299 for attrib in node.attrib:

300 name = normalize(attrib)

301 if name == "class":

302 classe = node.attrib[name]

303 html_text = f"<h2 class={classe}>{inner_html_text}</h2>"

304 tex_text = f"<h2 class={classe}>{inner_tex_text}</h2>"

305

306 xml_text = '<h2 xml:space="preserve">' + inner_jats_xml_text + "</h2>"

307

308 return html_text, tex_text, xml_text

309

310 def parse_node_with_h3(self, node, **kwargs):

311 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

312 node, **kwargs

313 )

314 classe = ""

315 for attrib in node.attrib:

316 name = normalize(attrib)

317 if name == "class":

318 classe = node.attrib[name]

319 html_text = f"<h3 class={classe}>{inner_html_text}</h3>"

320 tex_text = f"<h3 class={classe}>{inner_tex_text}</h3>"

321

322 xml_text = '<h3 xml:space="preserve">' + inner_jats_xml_text + "</h3>"

323

324 return html_text, tex_text, xml_text

325

326 def parse_node_with_h4(self, node, **kwargs):

327 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

328 node, **kwargs

329 )

330 classe = ""

331 for attrib in node.attrib:

332 name = normalize(attrib)

333 if name == "class":

334 classe = node.attrib[name]

335 html_text = f"<h4 class={classe}>{inner_html_text}</h4>"

336 tex_text = f"<h4 class={classe}>{inner_tex_text}</h4>"

337

338 xml_text = '<h4 xml:space="preserve">' + inner_jats_xml_text + "</h4>"

339 return html_text, tex_text, xml_text

340

341 def parse_node_with_h5(self, node, **kwargs):

342 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

343 node, **kwargs

344 )

345 classe = ""

346 for attrib in node.attrib:

347 name = normalize(attrib)

348 if name == "class":

349 classe = node.attrib[name]

350 html_text = f"<h5 class={classe}>{inner_html_text}</h5>"

351 tex_text = f"<h5 class={classe}>{inner_tex_text}</h5>"

352

353 xml_text = '<h5 xml:space="preserve">' + inner_jats_xml_text + "</h5>"

354

355 return html_text, tex_text, xml_text

356

357 def parse_node_with_h6(self, node, **kwargs):

358 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

359 node, **kwargs

360 )

361 classe = ""

362 for attrib in node.attrib:

363 name = normalize(attrib)

364 if name == "class":

365 classe = node.attrib[name]

366 html_text = f"<h6 class={classe}>{inner_html_text}</h6>"

367 tex_text = f"<h6 class={classe}>{inner_tex_text}</h6>"

368

369 xml_text = '<h6 xml:space="preserve">' + inner_jats_xml_text + "</h6>"

370 return html_text, tex_text, xml_text

371

372 def parse_node_with_img(self, node, **kwargs):

373 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

374 node, **kwargs

375 )

376

377 # node.attribe["style"] = ""

378 try:

379 prefix = settings.SITE_URL_PREFIX

380 except AttributeError:

381 prefix = ""

382

383 # src = f"{prefix}/media/img/{self.volume}/{self.pid}/src/media"

384 src = f"{prefix}/media/img/{self.issue_pid}/{self.pid}/src/media"

385 href = ""

386 classe = ""

387 for attrib in node.attrib:

388 name = normalize(attrib)

389 if name == "src":

390 img = os.path.basename(node.attrib[name])

391 name, ext = os.path.splitext(img)

392 # If an image was convreted to jpg, pandoc still wrote the html with the previous extension,

393 # '.tiff' for exemple

394 if ext in [".tiff", ".tif"]:

395 img = name + ".jpg"

396 src = f"{src}/{img}"

397 elif name == "style":

398 classe = "article-body-img"

399 elif name == "data-custom-style":

400 classe = node.attrib[name].replace(" ", "-")

401

402 html_text = f"<img src={src} class={classe}>{inner_html_text}</img>"

403 tex_text = f"<img src={src} class={classe}>{inner_html_text}</img>"

404 xml_text = f'<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="{href}">{inner_jats_xml_text}</graphic>'

405

406 return html_text, tex_text, xml_text

407

408 def parse_node_with_li(self, node, **kwargs):

409 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

410 node, **kwargs

411 )

412 parent_node = node.getparent()

413 if parent_node.tag == "ul":

414 html_text = f"<li >{inner_html_text}</li>"

415 tex_text = f"<li >{inner_tex_text}</li>"

416 else:

417 html_text = f"<li class='article-list'>{inner_html_text}</li>"

418 tex_text = f"<li class='article-list'>{inner_tex_text}</li>"

419

420 xml_text = f"<list-item><p>{inner_jats_xml_text}</p></list-item>"

421

422 return html_text, tex_text, xml_text

423

424 def parse_node_with_mixed_content(self, node, **kwargs):

425 """

426 Parse and return the text of an XML node which mixes text and XML sub-nodes.

427 Ex: <node>text1 <a>text_a</a> text2 <b>text_b</b>b_tail</node>

428 Some inner nodes are removed, others are kept or replaced.

429

430 Cedrics XMLs store the MathML and the TeX formulas in 2 siblings.

431 Parse the 2 nodes at the same time.

432

433 The JATS xml string is constructed at the same time because it is used during a PTF export

434

435 :param node: XML Node (with MathML), XML Node (with TexMath)

436 :param kwargs: params of the function

437 :return: HTML text, TeX test, XML text

438 """

439

440 html_text = tex_text = jats_xml_text = ""

441

442 if node is None: 442 ↛ 443line 442 didn't jump to line 443, because the condition on line 442 was never true

443 return html_text, tex_text, jats_xml_text

444

445 # The tail is the text following the end of the node

446 # Ex: <node>text1<a>text_a</a>a_tail</node>

447 # The HTML text has to include the tail

448 # only if html_from_mixed_content was called recursively

449 kwargs["is_top"] = kwargs["is_top"] if "is_top" in kwargs else True

450

451 # lxml replace HTML entities in node.tex and node.tail (like <)

452 # kwargs['escape'] allows to escape back the values

453 kwargs["escape"] = kwargs["escape"] if "escape" in kwargs else True

454

455 tag = node.tag

456

457 inner_html_text = inner_tex_text = inner_jats_xml_text = ""

458

459 # I. Add the node's text.

460 # Some tag have a corresponding html_from_@tag function to generate the HTML text.

461

462 fct_name = tag

463 fct_name = "parse_node_with_" + fct_name.replace("-", "_")

464 ftor = getattr(self, fct_name, None)

465 if callable(ftor):

466 inner_html_text, inner_tex_text, inner_jats_xml_text = ftor(node, **kwargs)

467 else:

468 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

469 node, **kwargs

470 )

471

472 html_text += inner_html_text

473 tex_text += inner_tex_text

474 jats_xml_text += inner_jats_xml_text

475

476 # III. Add the node's tail for children

477 if node.tail:

478 if self.check_citation and node.tag != "a": 478 ↛ 479line 478 didn't jump to line 479, because the condition on line 478 was never true

479 node.tail = create_innerlink_for_citation(node.tail, self.biblio)

480 kwargs["escape"] = False

481 html_text += escape(node.tail) if kwargs["escape"] else node.tail

482 tex_text += escape(node.tail) if kwargs["escape"] else node.tail

483 jats_xml_text += escape(node.tail)

484

485 return html_text, tex_text, jats_xml_text

486

487 def parse_node_with_ol(self, node, **kwargs):

488 # # JATS requires <list> to be inside <p>

489 # parent = node.getparent()

490 # if parent is None or parent.tag != "p":

491 # xml_text = f"<p>{xml_text}</p>"

492

493 return self.parse_list(node, **kwargs)

494

495 def parse_node_with_p(self, node, **kwargs):

496 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

497 node, **kwargs

498 )

499

500 html_text = inner_html_text if self.ignore_p else f"<p>{inner_html_text}</p>"

501 tex_text = inner_tex_text if self.ignore_p else f"<p>{inner_tex_text}</p>"

502 if self.ignore_p:

503 xml_text = inner_jats_xml_text

504 elif len(inner_jats_xml_text) > 0: 504 ↛ 507line 504 didn't jump to line 507, because the condition on line 504 was never false

505 xml_text = '<p xml:space="preserve">' + inner_jats_xml_text + "</p>"

506 else:

507 xml_text = '<p xml:space="preserve"/>'

508

509 return html_text, tex_text, xml_text

510

511 def parse_node_with_span(self, node, **kwargs):

512 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

513 node, **kwargs

514 )

515

516 the_class = node.get("class")

517 display = the_class == "math display"

518 if the_class in ["math inline", "math display"]: 518 ↛ 519line 518 didn't jump to line 519, because the condition on line 518 was never true

519 the_class = "mathjax-formula"

520

521 if the_class == "mathjax-formula":

522 html_text, tex_text, xml_text = self.parse_formula(node, display=display)

523 elif the_class is not None:

524 html_text = f'<span class="{the_class}">{inner_html_text}</span>'

525 tex_text = f'<span class="{the_class}">{inner_tex_text}</span>'

526 xml_text = inner_jats_xml_text

527 else:

528 html_text = f"<span>{inner_html_text}</span>"

529 tex_text = f"<span>{inner_tex_text}</span>"

530 xml_text = inner_jats_xml_text

531

532 return html_text, tex_text, xml_text

533

534 def parse_node_with_strong(self, node, **kwargs):

535 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

536 node, **kwargs

537 )

538

539 html_text = f"<strong>{inner_html_text}</strong>"

540 tex_text = f"<strong>{inner_tex_text}</strong>"

541

542 if len(inner_jats_xml_text) > 0:

543 xml_text = f"<bold>{inner_jats_xml_text}</bold>"

544 else:

545 xml_text = "<bold/>"

546

547 return html_text, tex_text, xml_text

548

549 def parse_node_with_sub(self, node, **kwargs):

550 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

551 node, **kwargs

552 )

553

554 html_text = f"<sub>{inner_html_text}</sub>"

555 tex_text = f"<sub>{inner_tex_text}</sub>"

556 xml_text = f"<sub>{inner_jats_xml_text}</sub>"

557

558 return html_text, tex_text, xml_text

559

560 def parse_node_with_sup(self, node, **kwargs):

561 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

562 node, **kwargs

563 )

564

565 html_text = f"<sup>{inner_html_text}</sup>"

566 tex_text = f"<sup>{inner_tex_text}</sup>"

567 xml_text = f"<sup>{inner_jats_xml_text}</sup>"

568

569 return html_text, tex_text, xml_text

570

571 def parse_node_with_table(self, node, **kwargs):

572 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

573 node, **kwargs

574 )

575 classe = ""

576 for attrib in node.attrib:

577 name = normalize(attrib)

578 if name == "class":

579 classe = node.attrib[name]

580 # Next condition checks style identification with pandoc library used

581 # for docx --> html conversion

582 elif name == "data-custom-style":

583 classe = node.attrib[name].replace(" ", "-")

584 if "PCJ" in self.issue_pid:

585 html_text = (

586 f"<div class='PCJ-table'><table class={classe}>{inner_html_text}</table></div>"

587 )

588 tex_text = (

589 f"<div class='PCJ-table'><table class={classe}>{inner_tex_text}</table></div>"

590 )

591 else:

592 html_text = f"<table class={classe}>{inner_html_text}</table>"

593 tex_text = f"<table class={classe}>{inner_tex_text}</table>"

594

595 xml_text = '<table xml:space="preserve">' + inner_jats_xml_text + "</table>"

596 return html_text, tex_text, xml_text

597

598 def parse_node_with_tbody(self, node, **kwargs):

599 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

600 node, **kwargs

601 )

602 classe = ""

603 for attrib in node.attrib:

604 name = normalize(attrib)

605 if name == "class":

606 classe = node.attrib[name]

607 html_text = f"<tbody class={classe}>{inner_html_text}</tbody>"

608 tex_text = f"<tbody class={classe}>{inner_tex_text}</tbody>"

609

610 xml_text = '<tbody xml:space="preserve">' + inner_jats_xml_text + "</tbody>"

611 return html_text, tex_text, xml_text

612

613 def parse_node_with_td(self, node, **kwargs):

614 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

615 node, **kwargs

616 )

617 classe = ""

618 rowspan = ""

619 colspan = ""

620 for attrib in node.attrib:

621 name = normalize(attrib)

622 if name == "class":

623 classe = node.attrib[name]

624 elif name == "rowspan":

625 rowspan = node.attrib[name]

626 elif name == "colspan":

627 colspan = node.attrib[name]

628 if classe:

629 html_text = f"<td class={classe} rowspan='{rowspan}' colspan='{colspan}'>{inner_html_text}</td>"

630 tex_text = (

631 f"<td class={classe} rowspan='{rowspan}' colspan='{colspan}'>{inner_tex_text}</td>"

632 )

633 else:

634 html_text = f"<td rowspan='{rowspan}' colspan='{colspan}'>{inner_html_text}</td>"

635 tex_text = f"<td rowspan='{rowspan}' colspan='{colspan}'>{inner_tex_text}</td>"

636

637 xml_text = '<td xml:space="preserve">' + inner_jats_xml_text + "</td>"

638 return html_text, tex_text, xml_text

639

640 def parse_node_with_th(self, node, **kwargs):

641 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

642 node, **kwargs

643 )

644 classe = ""

645 rowspan = ""

646 colspan = ""

647 for attrib in node.attrib:

648 name = normalize(attrib)

649 if name == "class":

650 classe = node.attrib[name]

651 elif name == "rowspan":

652 rowspan = node.attrib[name]

653 elif name == "colspan":

654 colspan = node.attrib[name]

655 if classe:

656 html_text = f"<th class={classe} rowspan='{rowspan}' colspan='{colspan}'>{inner_html_text}</th>"

657 tex_text = (

658 f"<th class={classe} rowspan='{rowspan}' colspan='{colspan}'>{inner_tex_text}</th>"

659 )

660 else:

661 html_text = f"<th rowspan='{rowspan}' colspan='{colspan}'>{inner_html_text}</th>"

662 tex_text = f"<th rowspan='{rowspan}' colspan='{colspan}'>{inner_tex_text}</th>"

663

664 xml_text = '<th xml:space="preserve">' + inner_jats_xml_text + "</th>"

665 return html_text, tex_text, xml_text

666

667 def parse_node_with_tr(self, node, **kwargs):

668 inner_html_text, inner_tex_text, inner_jats_xml_text = self.parse_node_inner(

669 node, **kwargs

670 )

671 classe = ""

672

673 html_text = f"<tr class='{classe}'>{inner_html_text}</tr>"

674 tex_text = f"<tr class='{classe}'>{inner_tex_text}</tr>"

675

676 xml_text = '<tr xml:space="preserve">' + inner_jats_xml_text + "</tr>"

677 return html_text, tex_text, xml_text

678

679 def parse_node_with_ul(self, node, **kwargs):

680 return self.parse_list(node, **kwargs)

681

682 def parse_tree(self, tree):

683 self.value_html, self.value_tex, self.value_xml = self.parse_node_with_mixed_content(

684 tree, is_top=True

685 )

686

687

688if __name__ == "__main__": 688 ↛ 689line 688 didn't jump to line 689, because the condition on line 688 was never true

689 html_value = r'<p>Te<st <span class="mathjax-formula">$x = {-b \pm \sqrt{b^2-4ac} \over 2a}$</span> done</p><ul><li>Item</li></ul><ol><li>Item 1<br />New line</li><li> </li></ol>'

690 parser = CkeditorParser(html_value=html_value)

691 result = parser.value_xml

692 print(result)

Coverage for apps/ptf/cmds/xml/ckeditor/ckeditor_parser.py: 45%

426 statements