diff --git a/.github/download_jars.py b/.github/download_jars.py index f63da801e7e..8ca4917eab3 100644 --- a/.github/download_jars.py +++ b/.github/download_jars.py @@ -1,11 +1,13 @@ import requests import random import os +import json print('Job Starting') BASE_URL = "https://search.maven.org/solrsearch/select" DOWNLOAD_URL_TEMPLATE = "https://repo1.maven.org/maven2/{group}/{artifact}/{version}/{artifact}-{version}.jar" +metadata_path = os.getenv('METADATA_PATH', 'metadata.json') OUTPUT_DIR = "downloaded_jars" NUM_JARS = 100 MAX_SIZE_MB = 5 * 1024 * 1024 # 5MB in bytes @@ -50,6 +52,23 @@ def get_random_artifact(): downloaded_count = 0 + + +def get_metadata(): + if os.path.isfile(metadata_path): # Check if it is a file + with open(metadata_path, 'r') as file: + return json.load(file) + elif os.path.isdir(metadata_path): + raise IsADirectoryError(f"{metadata_path} is a directory, not a file.") + return {"jars": []} + + +def save_metadata(data): + with open(metadata_path, 'w') as file: + json.dump(data, file, indent=4) + + +metadata = get_metadata() # Download 100 random JARs while downloaded_count < NUM_JARS: artifact = get_random_artifact() @@ -60,12 +79,18 @@ def get_random_artifact(): version = artifact['latestVersion'] download_url = construct_download_url(group, artifact_id, version) output_path = os.path.join(OUTPUT_DIR, f"{artifact_id}-{version}.jar") + artifact_name = f"{artifact_id}--{version}.jar" try: - if download_file(download_url, output_path): - print(f"Downloaded: {output_path}") - downloaded_count += 1 - else: - print(f"Skipped (too large): {output_path}") + if not any(jar['name'] == artifact_name for jar in metadata['jars']): + if download_file(download_url, output_path): + metadata['jars'].append({ + 'name': artifact_name + }) + save_metadata(metadata) + print(f"Downloaded: {output_path}") + downloaded_count += 1 + else: + print(f"Skipped (too large): {output_path}") except requests.RequestException as e: print(f"Failed to download {download_url}: {e}") -print(f"Downloaded {downloaded_count} JAR files.") +print(f"Downloaded {downloaded_count} JAR files.") \ No newline at end of file diff --git a/.github/workflows/test-jars.yml b/.github/workflows/test-jars.yml index ec32fdfb606..7ce5290253e 100644 --- a/.github/workflows/test-jars.yml +++ b/.github/workflows/test-jars.yml @@ -19,9 +19,25 @@ jobs: run: pip install requests + - name: Download Metadata + id: download-metadata + uses: actions/download-artifact@v3 + with: + name: metadata + path: metadata + continue-on-error: true # Allows workflow to continue even if the artifact metadata is not found (obviously it will not be found for the first run) + + - name: Create empty metadata file + if: failure() # This runs if the previous step failed + run: | + echo '{"jars":[]}' > metadata/metadata.json + - name: Download random JARs + id: download run: | python .github/download_jars.py + env: + METADATA_PATH: metadata/metadata.json - name: Upload JARs uses: actions/upload-artifact@v3 @@ -29,6 +45,12 @@ jobs: name: jars path: downloaded_jars/ + - name: Upload Metadata + uses: actions/upload-artifact@v3 + with: + name: metadata + path: metadata/metadata.json + - name: Setup Java uses: actions/setup-java@v3 with: @@ -48,11 +70,16 @@ jobs: run: | mvn clean install -DskipTests + - name: List directory contents + run: | + ls -l + ls -l ${{ github.workspace }}/downloaded_jars + - name: Run tests on downloaded JARs run: | - for jar in ${{ github.workspace }}/downloaded_jars/*.jar; do + for jar in $(ls ${{ github.workspace }}/downloaded_jars/*.jar); do echo "Testing $jar" - mvn clean test -Dtest=sootup.java.bytecode.inputlocation.RandomJarTest -DjarPath="$jar" -pl sootup.java.bytecode + mvn test -Dtest=sootup.java.bytecode.inputlocation.RandomJarTest -DjarPath="$jar" -pl sootup.java.bytecode done - name: Upload the Artifact