Skip to content

Commit 83e86fb

Browse files
committed
merge
Signed-off-by: Dave Lee <[email protected]>
2 parents 643db2d + 642f6ce commit 83e86fb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+922
-315
lines changed

.github/check_and_update.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import hashlib
2+
from huggingface_hub import hf_hub_download, get_paths_info
3+
import requests
4+
import sys
5+
import os
6+
7+
uri = sys.argv[0]
8+
file_name = uri.split('/')[-1]
9+
10+
# Function to parse the URI and determine download method
11+
def parse_uri(uri):
12+
if uri.startswith('huggingface://'):
13+
repo_id = uri.split('://')[1]
14+
return 'huggingface', repo_id.rsplit('/', 1)[0]
15+
elif 'huggingface.co' in uri:
16+
parts = uri.split('/resolve/')
17+
if len(parts) > 1:
18+
repo_path = parts[0].split('https://huggingface.co/')[-1]
19+
return 'huggingface', repo_path
20+
return 'direct', uri
21+
22+
def calculate_sha256(file_path):
23+
sha256_hash = hashlib.sha256()
24+
with open(file_path, 'rb') as f:
25+
for byte_block in iter(lambda: f.read(4096), b''):
26+
sha256_hash.update(byte_block)
27+
return sha256_hash.hexdigest()
28+
29+
def manual_safety_check_hf(repo_id):
30+
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
31+
scan = scanResponse.json()
32+
if scan['hasUnsafeFile']:
33+
return scan
34+
return None
35+
36+
download_type, repo_id_or_url = parse_uri(uri)
37+
38+
new_checksum = None
39+
40+
# Decide download method based on URI type
41+
if download_type == 'huggingface':
42+
# Check if the repo is flagged as dangerous by HF
43+
hazard = manual_safety_check_hf(repo_id_or_url)
44+
if hazard != None:
45+
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
46+
sys.exit(5)
47+
# Use HF API to pull sha
48+
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
49+
try:
50+
new_checksum = file.lfs.sha256
51+
break
52+
except Exception as e:
53+
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
54+
sys.exit(2)
55+
if new_checksum is None:
56+
try:
57+
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
58+
except Exception as e:
59+
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
60+
sys.exit(2)
61+
else:
62+
response = requests.get(repo_id_or_url)
63+
if response.status_code == 200:
64+
with open(file_name, 'wb') as f:
65+
f.write(response.content)
66+
file_path = file_name
67+
elif response.status_code == 404:
68+
print(f'File not found: {response.status_code}', file=sys.stderr)
69+
sys.exit(2)
70+
else:
71+
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
72+
sys.exit(1)
73+
74+
if new_checksum is None:
75+
new_checksum = calculate_sha256(file_path)
76+
print(new_checksum)
77+
os.remove(file_path)
78+
else:
79+
print(new_checksum)

.github/checksum_checker.sh

Lines changed: 8 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -14,77 +14,14 @@ function check_and_update_checksum() {
1414
idx="$5"
1515

1616
# Download the file and calculate new checksum using Python
17-
new_checksum=$(python3 -c "
18-
import hashlib
19-
from huggingface_hub import hf_hub_download, get_paths_info
20-
import requests
21-
import sys
22-
import os
23-
24-
uri = '$uri'
25-
file_name = uri.split('/')[-1]
26-
27-
# Function to parse the URI and determine download method
28-
# Function to parse the URI and determine download method
29-
def parse_uri(uri):
30-
if uri.startswith('huggingface://'):
31-
repo_id = uri.split('://')[1]
32-
return 'huggingface', repo_id.rsplit('/', 1)[0]
33-
elif 'huggingface.co' in uri:
34-
parts = uri.split('/resolve/')
35-
if len(parts) > 1:
36-
repo_path = parts[0].split('https://huggingface.co/')[-1]
37-
return 'huggingface', repo_path
38-
return 'direct', uri
39-
40-
def calculate_sha256(file_path):
41-
sha256_hash = hashlib.sha256()
42-
with open(file_path, 'rb') as f:
43-
for byte_block in iter(lambda: f.read(4096), b''):
44-
sha256_hash.update(byte_block)
45-
return sha256_hash.hexdigest()
46-
47-
download_type, repo_id_or_url = parse_uri(uri)
48-
49-
new_checksum = None
50-
51-
# Decide download method based on URI type
52-
if download_type == 'huggingface':
53-
# Use HF API to pull sha
54-
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
55-
try:
56-
new_checksum = file.lfs.sha256
57-
break
58-
except Exception as e:
59-
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
60-
sys.exit(2)
61-
if new_checksum is None:
62-
try:
63-
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
64-
except Exception as e:
65-
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
66-
sys.exit(2)
67-
else:
68-
response = requests.get(repo_id_or_url)
69-
if response.status_code == 200:
70-
with open(file_name, 'wb') as f:
71-
f.write(response.content)
72-
file_path = file_name
73-
elif response.status_code == 404:
74-
print(f'File not found: {response.status_code}', file=sys.stderr)
75-
sys.exit(2)
76-
else:
77-
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
78-
sys.exit(1)
79-
80-
if new_checksum is None:
81-
new_checksum = calculate_sha256(file_path)
82-
print(new_checksum)
83-
os.remove(file_path)
84-
else:
85-
print(new_checksum)
17+
new_checksum=$(python3 ./check_and_update.py $uri)
18+
result=$?
8619

87-
")
20+
if [[ result -eq 5]]; then
21+
echo "Contaminated entry detected, deleting entry for $model_name..."
22+
yq eval -i "del([$idx])" "$input_yaml"
23+
return
24+
fi
8825

8926
if [[ "$new_checksum" == "" ]]; then
9027
echo "Error calculating checksum for $file_name. Skipping..."
@@ -94,7 +31,7 @@ else:
9431
echo "Checksum for $file_name: $new_checksum"
9532

9633
# Compare and update the YAML file if checksums do not match
97-
result=$?
34+
9835
if [[ $result -eq 2 ]]; then
9936
echo "File not found, deleting entry for $file_name..."
10037
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"

.github/workflows/image-pr.yml

Lines changed: 92 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,16 @@ jobs:
3535
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
3636
matrix:
3737
include:
38-
- build-type: ''
39-
platforms: 'linux/amd64'
40-
tag-latest: 'false'
41-
tag-suffix: '-ffmpeg'
42-
ffmpeg: 'true'
43-
image-type: 'extras'
44-
runs-on: 'arc-runner-set'
45-
base-image: "ubuntu:22.04"
46-
makeflags: "--jobs=3 --output-sync=target"
38+
# This is basically covered by the AIO test
39+
# - build-type: ''
40+
# platforms: 'linux/amd64'
41+
# tag-latest: 'false'
42+
# tag-suffix: '-ffmpeg'
43+
# ffmpeg: 'true'
44+
# image-type: 'extras'
45+
# runs-on: 'arc-runner-set'
46+
# base-image: "ubuntu:22.04"
47+
# makeflags: "--jobs=3 --output-sync=target"
4748
- build-type: 'cublas'
4849
cuda-major-version: "12"
4950
cuda-minor-version: "4"
@@ -55,85 +56,85 @@ jobs:
5556
runs-on: 'arc-runner-set'
5657
base-image: "ubuntu:22.04"
5758
makeflags: "--jobs=3 --output-sync=target"
58-
- build-type: 'hipblas'
59-
platforms: 'linux/amd64'
60-
tag-latest: 'false'
61-
tag-suffix: '-hipblas'
62-
ffmpeg: 'false'
63-
image-type: 'extras'
64-
base-image: "rocm/dev-ubuntu-22.04:6.1"
65-
grpc-base-image: "ubuntu:22.04"
66-
runs-on: 'arc-runner-set'
67-
makeflags: "--jobs=3 --output-sync=target"
68-
- build-type: 'sycl_f16'
69-
platforms: 'linux/amd64'
70-
tag-latest: 'false'
71-
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
72-
grpc-base-image: "ubuntu:22.04"
73-
tag-suffix: 'sycl-f16-ffmpeg'
74-
ffmpeg: 'true'
75-
image-type: 'extras'
76-
runs-on: 'arc-runner-set'
77-
makeflags: "--jobs=3 --output-sync=target"
78-
core-image-build:
79-
uses: ./.github/workflows/image_build.yml
80-
with:
81-
tag-latest: ${{ matrix.tag-latest }}
82-
tag-suffix: ${{ matrix.tag-suffix }}
83-
ffmpeg: ${{ matrix.ffmpeg }}
84-
image-type: ${{ matrix.image-type }}
85-
build-type: ${{ matrix.build-type }}
86-
cuda-major-version: ${{ matrix.cuda-major-version }}
87-
cuda-minor-version: ${{ matrix.cuda-minor-version }}
88-
platforms: ${{ matrix.platforms }}
89-
runs-on: ${{ matrix.runs-on }}
90-
base-image: ${{ matrix.base-image }}
91-
grpc-base-image: ${{ matrix.grpc-base-image }}
92-
makeflags: ${{ matrix.makeflags }}
93-
secrets:
94-
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
95-
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
96-
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
97-
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
98-
strategy:
99-
matrix:
100-
include:
101-
- build-type: ''
102-
platforms: 'linux/amd64'
103-
tag-latest: 'false'
104-
tag-suffix: '-ffmpeg-core'
105-
ffmpeg: 'true'
106-
image-type: 'core'
107-
runs-on: 'ubuntu-latest'
108-
base-image: "ubuntu:22.04"
109-
makeflags: "--jobs=4 --output-sync=target"
110-
- build-type: 'sycl_f16'
111-
platforms: 'linux/amd64'
112-
tag-latest: 'false'
113-
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
114-
grpc-base-image: "ubuntu:22.04"
115-
tag-suffix: 'sycl-f16-ffmpeg-core'
116-
ffmpeg: 'true'
117-
image-type: 'core'
118-
runs-on: 'arc-runner-set'
119-
makeflags: "--jobs=3 --output-sync=target"
120-
- build-type: 'cublas'
121-
cuda-major-version: "12"
122-
cuda-minor-version: "4"
123-
platforms: 'linux/amd64'
124-
tag-latest: 'false'
125-
tag-suffix: '-cublas-cuda12-ffmpeg-core'
126-
ffmpeg: 'true'
127-
image-type: 'core'
128-
runs-on: 'ubuntu-latest'
129-
base-image: "ubuntu:22.04"
130-
makeflags: "--jobs=4 --output-sync=target"
131-
- build-type: 'vulkan'
132-
platforms: 'linux/amd64'
133-
tag-latest: 'false'
134-
tag-suffix: '-vulkan-ffmpeg-core'
135-
ffmpeg: 'true'
136-
image-type: 'core'
137-
runs-on: 'ubuntu-latest'
138-
base-image: "ubuntu:22.04"
139-
makeflags: "--jobs=4 --output-sync=target"
59+
# - build-type: 'hipblas'
60+
# platforms: 'linux/amd64'
61+
# tag-latest: 'false'
62+
# tag-suffix: '-hipblas'
63+
# ffmpeg: 'false'
64+
# image-type: 'extras'
65+
# base-image: "rocm/dev-ubuntu-22.04:6.1"
66+
# grpc-base-image: "ubuntu:22.04"
67+
# runs-on: 'arc-runner-set'
68+
# makeflags: "--jobs=3 --output-sync=target"
69+
# - build-type: 'sycl_f16'
70+
# platforms: 'linux/amd64'
71+
# tag-latest: 'false'
72+
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
73+
# grpc-base-image: "ubuntu:22.04"
74+
# tag-suffix: 'sycl-f16-ffmpeg'
75+
# ffmpeg: 'true'
76+
# image-type: 'extras'
77+
# runs-on: 'arc-runner-set'
78+
# makeflags: "--jobs=3 --output-sync=target"
79+
# core-image-build:
80+
# uses: ./.github/workflows/image_build.yml
81+
# with:
82+
# tag-latest: ${{ matrix.tag-latest }}
83+
# tag-suffix: ${{ matrix.tag-suffix }}
84+
# ffmpeg: ${{ matrix.ffmpeg }}
85+
# image-type: ${{ matrix.image-type }}
86+
# build-type: ${{ matrix.build-type }}
87+
# cuda-major-version: ${{ matrix.cuda-major-version }}
88+
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
89+
# platforms: ${{ matrix.platforms }}
90+
# runs-on: ${{ matrix.runs-on }}
91+
# base-image: ${{ matrix.base-image }}
92+
# grpc-base-image: ${{ matrix.grpc-base-image }}
93+
# makeflags: ${{ matrix.makeflags }}
94+
# secrets:
95+
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
96+
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
97+
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
98+
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
99+
# strategy:
100+
# matrix:
101+
# include:
102+
# - build-type: ''
103+
# platforms: 'linux/amd64'
104+
# tag-latest: 'false'
105+
# tag-suffix: '-ffmpeg-core'
106+
# ffmpeg: 'true'
107+
# image-type: 'core'
108+
# runs-on: 'ubuntu-latest'
109+
# base-image: "ubuntu:22.04"
110+
# makeflags: "--jobs=4 --output-sync=target"
111+
# - build-type: 'sycl_f16'
112+
# platforms: 'linux/amd64'
113+
# tag-latest: 'false'
114+
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
115+
# grpc-base-image: "ubuntu:22.04"
116+
# tag-suffix: 'sycl-f16-ffmpeg-core'
117+
# ffmpeg: 'true'
118+
# image-type: 'core'
119+
# runs-on: 'arc-runner-set'
120+
# makeflags: "--jobs=3 --output-sync=target"
121+
# - build-type: 'cublas'
122+
# cuda-major-version: "12"
123+
# cuda-minor-version: "4"
124+
# platforms: 'linux/amd64'
125+
# tag-latest: 'false'
126+
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
127+
# ffmpeg: 'true'
128+
# image-type: 'core'
129+
# runs-on: 'ubuntu-latest'
130+
# base-image: "ubuntu:22.04"
131+
# makeflags: "--jobs=4 --output-sync=target"
132+
# - build-type: 'vulkan'
133+
# platforms: 'linux/amd64'
134+
# tag-latest: 'false'
135+
# tag-suffix: '-vulkan-ffmpeg-core'
136+
# ffmpeg: 'true'
137+
# image-type: 'core'
138+
# runs-on: 'ubuntu-latest'
139+
# base-image: "ubuntu:22.04"
140+
# makeflags: "--jobs=4 --output-sync=target"

0 commit comments

Comments
 (0)