Löschen Sie mit Notepad++ den gesamten Code innerhalb eines Tags in mehreren HTML-Dateien

Question

Nun, unten ist ein schnell zusammengewürfelter Code, der ganz gut zu funktionieren scheint füreinfachDatenbeispiel. Machen Sie damit, was Sie wollen.

Ja, das ist nichts, was Sie mit einer einfachen Suchen-und-Ersetzen-Funktion im Editor erledigen können, selbst mit den in F&R in N++ verfügbaren regulären Ausdrücken ist das nicht wirklich möglich … Oder selbst wenn es in regulären Ausdrücken möglich ist – das übersteigt mein Niveau bei weitem. ;)

import sys
import re

def get_tag():
    buffer = ""
    while True:
        c = sys.stdin.read(1)
        if not c:
            sys.stderr.write("Unexpected EOF\n")
            break
        buffer += c
        if c == '"' or c == "'":
            buffer += get_string(c)
        if c == '>':
            break
    return buffer

def get_string(quote = '"'):
    buffer = ""
    while True:
        c = sys.stdin.read(1)
        if not c:
            sys.stderr.write("Unexpected EOF\n")
            break
        buffer += c
        if c == quote and buffer[-2] != '\\':
            break
    return buffer


buffer = ""
skip_depth = 0

ul_begin = re.compile(r"<\s*li(?:>|\s+.*>)", re.IGNORECASE | re.DOTALL)
ul_begin_share = re.compile(r"<\s*li\s+.*class\s*=\s*([\"'])(?:[^\1]*?\s+)?share(?:\s+[^\1]*?)?(\1).*?>", re.IGNORECASE | re.DOTALL)
ul_end = re.compile(r"</\s*li\s*>", re.IGNORECASE)


while True:
    if skip_depth < 0:
        skip_depth = 0
    c = sys.stdin.read(1)
    if not c:
        #sys.stderr.write("EOF\n")
        break

    if c == '<':
        buffer = c + get_tag()

        if skip_depth > 0 and ul_begin.match(buffer):
            skip_depth += 1
        elif ul_begin_share.match(buffer):
            skip_depth += 1
        elif ul_end.match(buffer):
            skip_depth -= 1
            if skip_depth == 0:
                continue
        c = buffer

    if skip_depth > 0:
        pass
    else:
        sys.stdout.write(c)

Testdaten in data.html:

<ul>
    <li>do not touch that</li>
    <li id="whatever1">or that</li>
    <li class="share">delete this</li>
    <li class="foo-bar share">delete this</li>
    <li class="foobar share foo-bar_">delete this</li>
    <li class='share'>delete this</li>
    <li class='"wtf" share'>delete this</li>
    <li class=" share ">delete this</li>
    <li class="  share  ">delete this</li>
    <li class="foo share">delete this</li>
    <li class="share bar">delete this</li>
    <li class="foo share bar">delete this</li>
    <li class="long foo share short bar">delete this</li>
    <li class=" share ">delete this</li>
    <li class=" foo share bar ">delete this</li>
    <!-- but leave <li class="share">this comment</li> alone -->
    <li>This will stay</li>
    <li class="share">
        <li>delete this</li>
        <li>delete this</li>
    </li>
    <li style="not !important" class="share">delete this</li>
    <li>leave this, but
        <li class="share">
            <li>delete this</li>
            <li>delete this</li>
            <li>delete this</li>
            <li>delete this</li>
        </li>
    </li>
    <li class=" foo share bar ">delete this</li>
    <li class="shared">Can't touch this, naaaa-nanana...</li>
</ul>
<em>blablabla</em>

Beispiellauf:

$ python test.py < data.html > data.corrected.html
$ cat data.corrected.html
<ul>
    <li>do not touch that</li>
    <li id="whatever1">or that</li>













    <!-- but leave <li class="share">this comment</li> alone -->
    <li>This will stay</li>


    <li>leave this, but

    </li>

    <li class="shared">Can't touch this, naaaa-nanana...</li>
</ul>
<em>blablabla</em>

Answer 1

Nun, unten ist ein schnell zusammengewürfelter Code, der ganz gut zu funktionieren scheint füreinfachDatenbeispiel. Machen Sie damit, was Sie wollen.

Ja, das ist nichts, was Sie mit einer einfachen Suchen-und-Ersetzen-Funktion im Editor erledigen können, selbst mit den in F&R in N++ verfügbaren regulären Ausdrücken ist das nicht wirklich möglich … Oder selbst wenn es in regulären Ausdrücken möglich ist – das übersteigt mein Niveau bei weitem. ;)

import sys
import re

def get_tag():
    buffer = ""
    while True:
        c = sys.stdin.read(1)
        if not c:
            sys.stderr.write("Unexpected EOF\n")
            break
        buffer += c
        if c == '"' or c == "'":
            buffer += get_string(c)
        if c == '>':
            break
    return buffer

def get_string(quote = '"'):
    buffer = ""
    while True:
        c = sys.stdin.read(1)
        if not c:
            sys.stderr.write("Unexpected EOF\n")
            break
        buffer += c
        if c == quote and buffer[-2] != '\\':
            break
    return buffer


buffer = ""
skip_depth = 0

ul_begin = re.compile(r"<\s*li(?:>|\s+.*>)", re.IGNORECASE | re.DOTALL)
ul_begin_share = re.compile(r"<\s*li\s+.*class\s*=\s*([\"'])(?:[^\1]*?\s+)?share(?:\s+[^\1]*?)?(\1).*?>", re.IGNORECASE | re.DOTALL)
ul_end = re.compile(r"</\s*li\s*>", re.IGNORECASE)


while True:
    if skip_depth < 0:
        skip_depth = 0
    c = sys.stdin.read(1)
    if not c:
        #sys.stderr.write("EOF\n")
        break

    if c == '<':
        buffer = c + get_tag()

        if skip_depth > 0 and ul_begin.match(buffer):
            skip_depth += 1
        elif ul_begin_share.match(buffer):
            skip_depth += 1
        elif ul_end.match(buffer):
            skip_depth -= 1
            if skip_depth == 0:
                continue
        c = buffer

    if skip_depth > 0:
        pass
    else:
        sys.stdout.write(c)

Testdaten in data.html:

<ul>
    <li>do not touch that</li>
    <li id="whatever1">or that</li>
    <li class="share">delete this</li>
    <li class="foo-bar share">delete this</li>
    <li class="foobar share foo-bar_">delete this</li>
    <li class='share'>delete this</li>
    <li class='"wtf" share'>delete this</li>
    <li class=" share ">delete this</li>
    <li class="  share  ">delete this</li>
    <li class="foo share">delete this</li>
    <li class="share bar">delete this</li>
    <li class="foo share bar">delete this</li>
    <li class="long foo share short bar">delete this</li>
    <li class=" share ">delete this</li>
    <li class=" foo share bar ">delete this</li>
    <!-- but leave <li class="share">this comment</li> alone -->
    <li>This will stay</li>
    <li class="share">
        <li>delete this</li>
        <li>delete this</li>
    </li>
    <li style="not !important" class="share">delete this</li>
    <li>leave this, but
        <li class="share">
            <li>delete this</li>
            <li>delete this</li>
            <li>delete this</li>
            <li>delete this</li>
        </li>
    </li>
    <li class=" foo share bar ">delete this</li>
    <li class="shared">Can't touch this, naaaa-nanana...</li>
</ul>
<em>blablabla</em>

Beispiellauf:

$ python test.py < data.html > data.corrected.html
$ cat data.corrected.html
<ul>
    <li>do not touch that</li>
    <li id="whatever1">or that</li>













    <!-- but leave <li class="share">this comment</li> alone -->
    <li>This will stay</li>


    <li>leave this, but

    </li>

    <li class="shared">Can't touch this, naaaa-nanana...</li>
</ul>
<em>blablabla</em>

Löschen Sie mit Notepad++ den gesamten Code innerhalb eines Tags in mehreren HTML-Dateien

Antwort1

verwandte Informationen