get-chromium-toolchain-strings.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
# This script extracts the revision and sub-revision from the update.py and update_rust.py files in the Chromium source code.
# The revision and sub-revision are used to identify the version of Clang and Rust used in the Chromium toolchain.

import json
import requests
import sys

def get_testfonts(url) -> str:
    """
    Reads the DEPS file (gclient) and extracts the testfonts SHA which is used
    as the object name (SHA256 as of 2024)
    deps['src/third_party/test_fonts']['objects'][0]['object_name']

    Args:
        url (str): The URL of the DEPS file on GitHub's raw endpoint.

    Returns:
        str: The SHA256 of the testfonts, or None if not found.
        """

    # We're not properly parsing the DEPS file, but it's 'close enough' to JSON that
    # we can throw away the preamble and do some remediation to read the values in.

    testfonts = None
    response = requests.get(url)
    if response.status_code == 200:
        text = response.content.decode('utf-8')
        lines = text.splitlines()
        # throw away everything up to `deps = {`
        # We'll add our own opening brace to make it valid JSON
        start = 0
        for idx, line in enumerate(lines):
            if line.startswith("deps = {"):
                start = idx + 1
                break

        # throw away everything after the variable ends `}`
        length = 0
        for idx, line in enumerate(lines):
            if idx < start:
                continue
            if line.startswith("}"):
                length = idx
                break

        deps: list[str] = ['{', '}']
        deps[1:1] = lines[start:length]

        # remove any comments, because JSON doesn't like them
        deps = [line for line in deps if not line.strip().startswith('#')]

        # I hate to do this, but we need to remediate the JSON - single quotes to double quotes ho!
        deps = [line.replace("'", '"') for line in deps]
        # the `condition` variable is always a python comparison. Let's not even try to parse it.
        # we don't care so just drop the whole line
        deps = [line for line in deps if "condition" not in line]
        # ditto `Var()`
        deps = [line for line in deps if "Var(" not in line]
        # if a line ends in ' +' it's a python thing and we probably already stripped whatever is being
        # concatenated, so we can just remove the '+' and append a ','.
        deps = [line.replace(" +", ",") if line.endswith(" +") else line for line in deps]
        # strip ' "@",' from any lines... No idea what gclient does with this
        deps = [line.replace(' "@",', "") for line in deps]


        # If we encounter '[{' or '}]' we should expand them onto individual lines.
        # for '[{', remove the { and add it on a new line, for '}]' remove the ] and add it on a new line.
        # every instance so far has been '}],' so let's assume that holds true?
        newdeps = []
        for line in deps:
            if '[{' in line:
                # 'blah: [', '{'
                newdeps.append(line[:-1])
                newdeps.append('{')
            elif '}]' in line:
                # '},', '],'
                newdeps.append(line[:-2])
                newdeps.append('],')
            else:
                newdeps.append(line)

        deps = newdeps

        # if the last thing in an object has a trailing comma, it's invalid JSON so we need to remove it,
        # probably easiest to do if we check that the next line is '}' when stripped and remediate that
        newdeps = []
        for idx, line in enumerate(deps):
            if line.endswith(",") and deps[idx + 1].strip() == "}":
                newdeps.append(line.replace(",", ""))
            elif line.endswith(",") and deps[idx + 1].strip() == "},":
                newdeps.append(line.replace(",", ""))
            else:
                newdeps.append(line)

        deps = newdeps
        newdeps = []

        for idx, line in enumerate(deps):
            if line.endswith("},") and deps[idx + 1].strip() == "]":
                newdeps.append(line.replace(",", ""))
            elif line.endswith("},") and deps[idx + 1].strip() == "],":
                newdeps.append(line.replace(",", ""))
            else:
                newdeps.append(line)

        deps = newdeps

        # If the line does not contain a colon _and_ the previous and next lines contain '{' and '}' respectively,
        # it's very likely a naked sha and json can't parse it. We can just strip it.
        newdeps = []
        for idx, line in enumerate(deps):
            if ":" not in line and "{" in deps[idx - 1] and '}' in deps[idx + 1]:
                continue
            else:
                newdeps.append(line)

        deps = newdeps

        # final blacklist; not worth writing a rule for this
        bad_lines = [
            '+ "@" + "42e892d96e47b1f6e29844cc705e148ec4856448", # release 1.9.4',
        ]
        deps = [line for line in deps if line.strip() not in bad_lines]

        # Clean up any keys with no values. Always do this last
        newdeps = []
        for idx, line in enumerate(deps):
            if line.endswith(":") and deps[idx + 1].strip() == "":
                continue
            else:
                newdeps.append(line)

        deps = newdeps

        # debug_lines = range(1460, 1500)
        # for idx, line in enumerate(deps):
        #     if idx in debug_lines:
        #         print(f"{idx}: {line}")

        # Now we have a list of strings that should be valid JSON
        # We can join them and load them
        deps = json.loads('\n'.join(deps))
        # Now we can get the testfonts SHA
        return deps['src/third_party/test_fonts/test_fonts']['objects'][0]['object_name']
    else:
        raise ValueError(f"Failed to get revision info. Status code: {response.status_code}")

    return testfonts


def get_revision_info(url) -> str:
    """
    Extracts revision and sub-revision from a Chromium source file URL.

    Args:
        url (str): The URL of the source file on GitHub's raw endpoint.

    Returns:
        tuple: A tuple containing the revision (str) and sub-revision (int)
               or (None, None) if not found.
    """
    response = requests.get(url)
    if response.status_code == 200:
        text = response.content.decode('utf-8')  # Decode to UTF-8
        lines = text.splitlines()
        revision = None
        sub_revision = None
        for line in lines:
            if line.startswith("CLANG_REVISION") and not line.startswith("PACKAGE_VERSION"):
                revision = line.split("=")[1].strip().strip("'")
            elif line.startswith("CLANG_SUB_REVISION") and not line.startswith("PACKAGE_VERSION"):
                sub_revision = int(line.split("=")[1].strip())
            elif line.startswith("RUST_REVISION") and not line.startswith("specieid") and not line.startswith("    return"):
                # I know that's spelt wrong, but apparently google cant't spell
                revision = line.split("=")[1].strip().strip("'")
            elif line.startswith("RUST_SUB_REVISION") and not line.startswith("specieid") and not line.startswith("    return"):
                sub_revision = int(line.split("=")[1].strip()[-1])
        if revision is None or sub_revision is None:
            raise ValueError("Failed to extract revision and sub-revision")
        return revision, sub_revision
    else:
        raise ValueError(f"Failed to get revision info. Status code: {response.status_code}")


def main():
    version = sys.argv[1] if len(sys.argv) > 1 else "128.0.6613.113"
    # It's a lot easier to use GH raw URLs for this
    base_url = "https://raw.githubusercontent.com/chromium/chromium/"
    clang_url = f"{base_url}{version}/tools/clang/scripts/update.py"
    rust_url = f"{base_url}{version}/tools/rust/update_rust.py"
    deps_url = f"{base_url}{version}/DEPS"
    clang_revision, clang_sub_revision = get_revision_info(clang_url)
    rust_revision, rust_sub_revision = get_revision_info(rust_url)
    testfonts = get_testfonts(deps_url)
    if clang_revision and clang_sub_revision:
        print(f"clang revision: {clang_revision}-{clang_sub_revision}")
    else:
        print("clang revision not found")
    if rust_revision and rust_sub_revision:
        print(f"rust revision: {rust_revision}-{rust_sub_revision}")
    else:
        print("rust revision not found")
    if testfonts:
        print(f"test fonts: {testfonts}")
    else:
        print("test fonts not found")

if __name__ == "__main__":
    main()