diff --git a/scripts/version_scanner/README.md b/scripts/version_scanner/README.md index 1978e42f55cc..4868087de1ec 100644 --- a/scripts/version_scanner/README.md +++ b/scripts/version_scanner/README.md @@ -17,7 +17,7 @@ python3 scripts/version_scanner/version_scanner.py -d -v * `-p`, `--path`: Root directory to scan (defaults to current directory) * `--package`: Specific subdirectory filter (useful for monorepos) * `--package-file`: Path to a file containing a list of package directories to scan (e.g., `scripts/version_scanner/small_package_list.txt`) -* `--config`: Path to the regex configuration file (defaults to scripts/version_scanner/regex_config.yaml) +* `--config`: Path to the regex configuration file (defaults to scripts/version_scanner/regex_pattern_config.yaml) * `-o`, `--output`: Path to the output CSV file (defaults to --.csv) * `--github-repo`: GitHub repository URL base (defaults to https://github.com/googleapis/google-cloud-python) * `--branch`: GitHub branch for links (defaults to main) @@ -43,7 +43,7 @@ pip install -r scripts/version_scanner/requirements.txt ## Configuration -The scanner uses a YAML configuration file (`regex_config.yaml`) to define rules and regex patterns. +The scanner uses a YAML configuration file (`regex_pattern_config.yaml`) to define rules and regex patterns. ## Ignoring Directories diff --git a/scripts/version_scanner/regex_config.yaml b/scripts/version_scanner/regex_pattern_config.yaml similarity index 53% rename from scripts/version_scanner/regex_config.yaml rename to scripts/version_scanner/regex_pattern_config.yaml index c88696016bfc..5a2e72654392 100644 --- a/scripts/version_scanner/regex_config.yaml +++ b/scripts/version_scanner/regex_pattern_config.yaml @@ -1,15 +1,9 @@ description: Search rules for identifying dependency versions + rules: - - name: explicit_version_string - description: Finds explicit version strings in code or configs. - examples: - - "'3.7'" - - '"3.7.1"' - - "'3.7.12'" - - "Python 3.7" - rules: - - | - \b{major}\.{minor}(\.\d+)?\b + # ========================================== + # 🐍 PYTHON RUNTIME SPECIFIC RULES + # ========================================== - name: python_requires description: Finds various forms of python_requires declarations. @@ -123,16 +117,119 @@ rules: - | Python{major}{minor}(?!\d) + # ========================================== + # 📦 GENERIC RULES (WILL APPLY TO RUNTIMES OR PACKAGES) + # ========================================== + + - name: explicit_version_string + description: Finds explicit version strings in code or configs (this may result in a lot of noise/false positives but can also catch edge cases that more specific rules do not). + examples: + - "'3.7'" + - '"3.7.1"' + - "'3.7.12'" + - "Python 3.7" + rules: + - | + \b{major}\.{minor}(\.\d+)?\b + - name: dependency_requirement - description: Finds standard dependency requirement formats (e.g., protobuf==3.7). + description: Finds standard dependency requirements (e.g., protobuf==3.7, pandas>=1.0). examples: - "protobuf==3.7" - - "protobuf>=3.7" - - "protobuf<=3.7" - - "protobuf~=3.7" - - "protobuf!=3.7" + - "protobuf >=3.7" + - "protobuf<= 3.7" + - "protobuf == 3.7" rules: - | {name}\s*(?:==|>=|<=|~=|!=)\s*{version}(?!\d) + - name: dependency_version_constant + description: Finds checks against custom version constants (e.g., PROTOBUF_VERSION, PANDAS_VERSION). + examples: + - "PROTOBUF_VERSION = '3.7'" + - "PROTOBUF_VERSION= '3.7'" + - "PROTOBUF_VERSION ='3.7'" + - "PROTOBUF_VERSION='3.7'" + - 'PROTOBUF_VERSION = "3.7"' + rules: + - | + {name}_VERSION\s*=\s*['"]?{version}['"]? + + - name: dependency_version_constant_membership + description: Finds checks where a version constant is checked for membership in a collection. + examples: + - 'PROTOBUF_VERSION[0:2] in ["3.", "4."]' + - 'PROTOBUF_VERSION[0:2] in {"3.", "4."}' + - 'PROTOBUF_VERSION[0:2] in ("3.", "4.")' + rules: + - | + {name}_VERSION(?:\[.*?\])?\s+in\s+[\(\[\{{].*?['"]?(?= "3.*"' + rules: + - | + {name}\s*(?:==|>=|<=|~=|!=)\s*['"]?{major}\.[x\*]['"]? + + - name: dependency_wildcard_generic + description: Finds wildcard version specifications in comments or documentation. + examples: + - "protobuf 3.x" + - "protobuf 3.*" + rules: + - | + {name}\s+(?:version\s+)?{major}\.[x\*](?!\w) + + - name: dependency_flexible_version + description: Finds versions with varying levels of specificity with accompanying qualifier. + examples: + - "protobuf 3." + - "protobuf 3.7" + - "protobuf 3.7." + - "protobuf 3.7.5" + rules: + - | + {name}\s+{major}\.(?!\d) + - | + {name}\s+{major}\.{minor}(?!\d) + - | + {name}\s+{major}\.{minor}\.(?!\d) + - | + {name}\s+{major}\.{minor}\.{patch}(?!\d) + + - name: generic_dependency_check_call + description: Finds generic function or class calls checking dependency and version. + examples: + - 'DependencyConstraint("google.protobuf", minimum_fully_supported_version="3.7")' + - 'CheckDep("protobuf", "3.7")' + rules: + - | + \b[A-Za-z_]\w*\s*\([^)]*['"](?:google\.)?{name}['"][^)]*{version}[^)]*\) + + - name: dependency_introspection + description: Finds standard package introspection methods and libraries. + examples: + - "protobuf.__version__" + - "importlib.metadata.version('protobuf')" + - "pkg_resources.get_distribution('protobuf')" + - "packaging.version # protobuf" + - "dist.metadata['Name'] == 'protobuf'" + rules: + - | + {name}\.__version__ + - | + importlib\.metadata\.version\s*\(\s*['"]{name}['"]\s*\) + - | + pkg_resources\.get_distribution\s*\(\s*['"]{name}['"]\s*\) + - | + packaging\.version(?=.*{name}) + - | + dist\.metadata\s*\[\s*['"]Name['"]\s*\]\s*==\s*['"]{name}['"] + # ========================================== + # 🎯 PACKAGE SPECIFIC RULES (I.E. SPECIFIC TO PROTOBUF, PANDAS, ETC) + # ========================================== + # Currently empty. diff --git a/scripts/version_scanner/tests/integration/test_scanner_integration.py b/scripts/version_scanner/tests/integration/test_scanner_integration.py index 3ce6d2cd3ab1..9baf927c6fa7 100644 --- a/scripts/version_scanner/tests/integration/test_scanner_integration.py +++ b/scripts/version_scanner/tests/integration/test_scanner_integration.py @@ -20,7 +20,7 @@ def test_integration_scan(tmp_path): # Paths to real tools scanner_path = os.path.abspath("version_scanner.py") - config_path = os.path.abspath("regex_config.yaml") + config_path = os.path.abspath("regex_pattern_config.yaml") # Static data directory data_dir = os.path.abspath("tests/data") diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 054df22421bc..5e6d1ed0611b 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -454,7 +454,7 @@ def test_upload_to_drive(mock_auth, mock_build): def test_regex_examples_from_config(): """Test that examples in config match at least one rule in the group.""" - config_path = "regex_config.yaml" + config_path = "regex_pattern_config.yaml" try: with open(config_path, 'r') as f: @@ -505,7 +505,7 @@ def test_regex_examples_from_config(): def test_regex_negative_cases(): """Verify regex patterns prevent false positives (lookaheads, patch bounds) and support whitespace.""" - config_path = "regex_config.yaml" + config_path = "regex_pattern_config.yaml" with open(config_path, 'r') as f: config = yaml.safe_load(f) @@ -646,7 +646,7 @@ def test_scan_file_truncation_bug(tmp_path): from version_scanner import ConfigManager, scan_file # Init config for 3.1 - config_manager = ConfigManager("regex_config.yaml", "python", "3.1") + config_manager = ConfigManager("regex_pattern_config.yaml", "python", "3.1") rules = config_manager.load_config() import re compiled_rules = [{"name": r["name"], "pattern": re.compile(r["pattern"], re.IGNORECASE)} for r in rules] @@ -828,7 +828,7 @@ def test_scan_repository_multi_targets(tmp_path): file2.write_text("protobuf==4.25.8\n") # Let's mock a config file with rules for both python and protobuf - config_file = tmp_path / "regex_config.yaml" + config_file = tmp_path / "regex_pattern_config.yaml" config_file.write_text(""" rules: - name: python_requires_check diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 6205e8effadd..128105d21372 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -658,7 +658,7 @@ def parse_targets_file(file_path: str) -> List[Tuple[str, str]]: def main(): script_dir = os.path.dirname(os.path.abspath(__file__)) - default_config = os.path.join(script_dir, "regex_config.yaml") + default_config = os.path.join(script_dir, "regex_pattern_config.yaml") parser = argparse.ArgumentParser( description="Scan repository for references to specific dependency versions." @@ -702,7 +702,7 @@ def main(): parser.add_argument( "--config", default=default_config, - help="Path to the regex configuration file (defaults to scripts/version_scanner/regex_config.yaml)" + help="Path to the regex configuration file (defaults to scripts/version_scanner/regex_pattern_config.yaml)" ) parser.add_argument(