제목에 따라 항목을 넣는 방법은 무엇입니까?

Question 1

BEGIN { OFS = "\t" }

# Collect headers from data
FNR == NR {
    for (i = 2; i <= NF; ++i)
        if (!($i in heads))
            heads[$i]
    next
}

# Output header
FNR == 1 {
    line = "Items"
    for (j in heads)
        line = line OFS j
    print line
}

{
    line = $1
    # Iterate through the header items, testing each field against it
    for (j in heads) {
        found = 0 # assume not found
        for (i = 2; !found && i <= NF; ++i)
            if ($i == j)
                found = 1 # matches header
        line = line OFS found
    }
    print line
}

데이터에서 이를 실행합니다(빈 줄을 제거한 후).

$ awk -f script.awk file file
Items   Acou#1  Bla#4   Bla#5   Elfa#2  Agly#3  Flq#2
Item_A: 1       0       1       0       0       0
Item_B: 1       0       0       1       0       1
Item_C: 1       1       0       1       0       1
Item_D: 0       1       0       1       1       0

입력 데이터 파일을 두 번 지정해야 합니다. 두 번 스캔하기 때문입니다. 첫 번째 스캔에서는 각 라인(블록)의 데이터 항목을 수집합니다 FNR == NR. 두 번째 스캔에서는 수집된 각 데이터 항목(헤더)을 각 라인의 데이터와 비교하여 테스트합니다.

출력은 0헤더의 필드가 해당 행의 데이터에 존재하지 않는 경우와 1존재하는 경우입니다. 이것은 아니다상당히당신이 요청한 것이 무엇인지, 그래서...

헤더를 자르고 표시할 데이터로 #뒤의 부분을 사용하는 변형입니다.#

BEGIN { OFS = "\t" }

# Collect headers from data
FNR == NR {
    for (i = 2; i <= NF; ++i) {
        split($i, h, "#")
        if (!(h[1] in heads))
            heads[h[1]]
    }
    next
}

# Output header
FNR == 1 {
    line = "Items"
    for (j in heads)
        line = line OFS j
    print line
}

{
    line = $1
    # Iterate through the header items, testing each field against it
    for (j in heads) {
        found = 0 # assume not found
        for (i = 2; !found && i <= NF; ++i) {
            split($i, h, "#")
            if (h[1] == j)
                found = h[2] # matches header
        }
        line = line OFS found
    }
    print line
}

실행하기:

$ awk -f script.awk file file
Items   Elfa    Bla     Acou    Agly    Flq
Item_A: 0       5       1       0       0
Item_B: 2       0       1       0       2
Item_C: 2       4       1       0       2
Item_D: 2       4       0       3       0

열의 순서는 반드시 정렬되지는 않습니다(연관 배열의 키로 저장되므로). 나는 그것을 정렬하기 위해 독자들에게 연습으로 남겨두고 있습니다.

Answer

BEGIN { OFS = "\t" }

# Collect headers from data
FNR == NR {
    for (i = 2; i <= NF; ++i)
        if (!($i in heads))
            heads[$i]
    next
}

# Output header
FNR == 1 {
    line = "Items"
    for (j in heads)
        line = line OFS j
    print line
}

{
    line = $1
    # Iterate through the header items, testing each field against it
    for (j in heads) {
        found = 0 # assume not found
        for (i = 2; !found && i <= NF; ++i)
            if ($i == j)
                found = 1 # matches header
        line = line OFS found
    }
    print line
}