IEEE.org     |     IEEE Xplore Digital Library     |     IEEE Standards     |     IEEE Spectrum     |     More Sites

Verified Commit 26704b2d authored by Emi Simpson's avatar Emi Simpson
Browse files

Make cleaner use a function, merge css additions

parent 284d931f
/* Modifications added automatically by LaTex cleaner */
.footnote-ref {
color: blue;
vertical-align: super;
font-size: 0.6rem;
text-decoration: none;
}
.footnote {
font-size: 0.7em;
}
......@@ -14,69 +14,68 @@ def find_tag(base: Tag, class_: str) -> Optional[Tag]:
else:
return assert_is_tag_after_find(result)
soup = BeautifulSoup(open('index.html'), 'html.parser')
def clean(html: str) -> str:
soup = BeautifulSoup(html, 'html.parser')
# Find the main element
ltx_main = find_tag(soup, 'ltx_page_main')
assert ltx_main is not None, "Bad HTML: LaTeX page doesn't contain a main element "\
"(ltx_page_main). Are you sure this is a LaTeX html page "\
"rendered with tex2html?"
# Find the main element
ltx_main = find_tag(soup, 'ltx_page_main')
assert ltx_main is not None, "Bad HTML: LaTeX page doesn't contain a main element "\
"(ltx_page_main). Are you sure this is a LaTeX html page "\
"rendered with tex2html?"
# Add a new section to contain the footnotes
footnote_section = soup.new_tag('section')
ltx_main.append(footnote_section)
# Add a new section to contain the footnotes
footnote_section = soup.new_tag('section')
ltx_main.append(footnote_section)
for footnote_wrapper in soup.find_all(class_ = 'ltx_note'):
for footnote_wrapper in soup.find_all(class_ = 'ltx_note'):
footnote_wrapper = assert_is_tag_after_find(footnote_wrapper)
footnote_wrapper = assert_is_tag_after_find(footnote_wrapper)
# Find the content element (that's the box with the text that the footnote refers to)
note_content = find_tag(footnote_wrapper, 'ltx_note_content')
assert note_content is not None, 'Bad HTML: Footnote exists without any content '\
'element (ltx_note_content)'
# Find the content element (that's the box with the text that the footnote refers to)
note_content = find_tag(footnote_wrapper, 'ltx_note_content')
assert note_content is not None, 'Bad HTML: Footnote exists without any content '\
'element (ltx_note_content)'
# Find the mark in the content and remove it. We'll replace it with our own later
note_mark = find_tag(note_content, class_='ltx_note_mark')
assert note_mark is not None, 'Bad HTML: ltx_note (footnote) object without an ' \
'ltx_note_mark (superscript number denoting the ' \
'footnote number) present in the note content '\
'(ltx_note_content)'
note_mark = note_mark.extract()
# Find the mark in the content and remove it. We'll replace it with our own later
note_mark = find_tag(note_content, class_='ltx_note_mark')
assert note_mark is not None, 'Bad HTML: ltx_note (footnote) object without an ' \
'ltx_note_mark (superscript number denoting the ' \
'footnote number) present in the note content '\
'(ltx_note_content)'
note_mark = note_mark.extract()
# Find the note number
note_number_raw = note_mark.string
assert note_number_raw is not None, 'Bad HTML: ltx_note_mark is present in a ' \
'footnote, but is empty'
try:
note_number = int(note_number_raw)
except ValueError:
raise RuntimeError('Bad HTML: ltx_note_mark is present in a footnote, but '\
'contains a non-numeric footnote number')
# Find the note number
note_number_raw = note_mark.string
assert note_number_raw is not None, 'Bad HTML: ltx_note_mark is present in a ' \
'footnote, but is empty'
try:
note_number = int(note_number_raw)
except ValueError:
raise RuntimeError('Bad HTML: ltx_note_mark is present in a footnote, but '\
'contains a non-numeric footnote number')
# Replace the old footnote with just a reference to the new footnote
new_reference = soup.new_tag(
'a',
href=f'#fn-{note_number}',
id=f'fn-ref-{note_number}',
)
new_reference['class'] = 'footnote-ref'
new_reference.string = str(note_number)
footnote_wrapper.insert_before(new_reference)
footnote_wrapper = footnote_wrapper.extract()
# Replace the old footnote with just a reference to the new footnote
new_reference = soup.new_tag(
'a',
href=f'#fn-{note_number}',
id=f'fn-ref-{note_number}',
)
new_reference['class'] = 'footnote-ref'
new_reference.string = str(note_number)
footnote_wrapper.insert_before(new_reference)
footnote_wrapper = footnote_wrapper.extract()
# Build the actual footnote the reference is pointing to
new_content = soup.new_tag('p', id = f'fn-{note_number}')
new_content['class'] = 'footnote'
backlink = soup.new_tag(
'a',
href=f'#fn-ref-{note_number}',
)
backlink['class'] = 'footnote-ref'
backlink.string = str(note_number)
new_content.contents.append(backlink)
new_content.contents.extend(note_content.contents)
footnote_section.append(new_content)
# Build the actual footnote the reference is pointing to
new_content = soup.new_tag('p', id = f'fn-{note_number}')
new_content['class'] = 'footnote'
backlink = soup.new_tag(
'a',
href=f'#fn-ref-{note_number}',
)
backlink['class'] = 'footnote-ref'
backlink.string = str(note_number)
new_content.contents.append(backlink)
new_content.contents.extend(note_content.contents)
footnote_section.append(new_content)
# Save the new soup
with open('cleaned.html', 'w') as output_file:
output_file.write(str(soup))
return str(soup)
......@@ -22,3 +22,14 @@ br {
#p1 {
display: none;
}
/* Footnote code used for cleaner script */
.footnote-ref {
color: blue;
vertical-align: super;
font-size: 0.6rem;
text-decoration: none;
}
.footnote {
font-size: 0.7em;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment