hwdb: PNP/ACPI lists on uefi.org are now in CSV format
authorLuca Boccassi <bluca@debian.org>
Tue, 14 Nov 2023 20:46:12 +0000 (20:46 +0000)
committerLuca Boccassi <bluca@debian.org>
Tue, 14 Nov 2023 21:07:08 +0000 (21:07 +0000)
Adjust the parsing as it's no longer HTML files. Some IDs end with
whitespace, without being quoted, which seems like a mistake as they
weren't before, so strip the ID columns before applying them.

hwdb.d/acpi-update.py

index e669d0d21f1be0319dbd9006f6bf2e91b75d2750..f65147c91d4d93e37e632564777915d1fd08e1cc 100755 (executable)
@@ -1,74 +1,26 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: LGPL-2.1-or-later
 
-from html.parser import HTMLParser
+from csv import reader
 from enum import Enum
 
-class State(Enum):
-    NOWHERE = 0
-    COMPANY = 1
-    AFTER_COMPANY = 2
-    PNPID = 3
-    AFTER_PNPID = 4
-    DATE = 5
-
-class PNPTableParser(HTMLParser):
-
-    def __init__(self):
-        HTMLParser.__init__(self)
-        self.state = State.NOWHERE
-        self.data = ""
-        self.pnpid = None
-        self.company = None
-        self.table = []
-
-    def handle_starttag(self, tag, attrs):
-
-        if tag == "td":
-            if self.state == State.NOWHERE:
-                self.state = State.COMPANY
-            elif self.state == State.AFTER_COMPANY:
-                self.state = State.PNPID
-            elif self.state == State.AFTER_PNPID:
-                self.state = State.DATE
-            else:
-                raise ValueError
-
-            self.data = ""
-
-    def handle_endtag(self, tag):
-
-        if tag == "td":
-            if self.state == State.COMPANY:
-                self.company = ' '.join(self.data.strip().split())
-                self.state = State.AFTER_COMPANY
-            elif self.state == State.PNPID:
-                self.pnpid = self.data.strip()
-                self.state = State.AFTER_PNPID
-                self.table.append((self.pnpid, self.company))
-            elif self.state == State.DATE:
-                self.state = State.NOWHERE
-            else:
-                raise ValueError
-
-    def handle_data(self, data):
-        self.data += data
-
 def read_table(a):
 
-    parser = PNPTableParser()
-
-    for line in a:
-        parser.feed(line)
+    table = []
 
-    parser.close()
-    parser.table.sort()
+    with open(a, newline='') as csvfile:
+        for row in reader(csvfile):
+            if row[0] == "Company":
+                # Skip header
+                continue
+            table.append(row)
 
-    for pnpid, company in parser.table:
-        print("\nacpi:{0}*:\n ID_VENDOR_FROM_DATABASE={1}".format(pnpid, company))
+    table.sort(key=lambda x: x[1])
 
-a = open("acpi_id_registry.html")
-b = open("pnp_id_registry.html")
+    for row in table:
+        # Some IDs end with whitespace, while they didn't in the old HTML table, so it's probably
+        # a mistake, strip it.
+        print("\nacpi:{0}*:\n ID_VENDOR_FROM_DATABASE={1}".format(row[1].strip(), row[0].strip()))
 
 print('# This file is part of systemd.\n'
       '#\n'
@@ -76,5 +28,5 @@ print('# This file is part of systemd.\n'
       '#     https://uefi.org/uefi-pnp-export\n'
       '#     https://uefi.org/uefi-acpi-export')
 
-read_table(a)
-read_table(b)
+read_table("acpi_id_registry.html")
+read_table("pnp_id_registry.html")