Skip to content

Commit f852b16

Browse files
committed
feat: Support DBRX model in Llama
1 parent 5f16213 commit f852b16

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,29 @@ public enum HuggingFaceModel {
4343
WIZARD_CODER_PYTHON_13B_Q5(13, 5, "WizardCoder-Python-13B-V1.0-GGUF"),
4444
WIZARD_CODER_PYTHON_34B_Q3(34, 3, "WizardCoder-Python-34B-V1.0-GGUF"),
4545
WIZARD_CODER_PYTHON_34B_Q4(34, 4, "WizardCoder-Python-34B-V1.0-GGUF"),
46-
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF");
46+
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF"),
47+
48+
DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
49+
DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
50+
DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
51+
DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
52+
DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
53+
DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");
4754

4855
private final int parameterSize;
4956
private final int quantization;
5057
private final String modelName;
58+
private final String user;
5159

5260
HuggingFaceModel(int parameterSize, int quantization, String modelName) {
61+
this(parameterSize, quantization, modelName, "TheBloke");
62+
}
63+
64+
HuggingFaceModel(int parameterSize, int quantization, String modelName, String user) {
5365
this.parameterSize = parameterSize;
5466
this.quantization = quantization;
5567
this.modelName = modelName;
68+
this.user = user;
5669
}
5770

5871
public int getParameterSize() {
@@ -74,15 +87,15 @@ public String getFileName() {
7487
public URL getFileURL() {
7588
try {
7689
return new URL(
77-
format("https://huggingface.co/TheBloke/%s/resolve/main/%s", modelName, getFileName()));
90+
"https://huggingface.co/%s/%s/resolve/main/%s".formatted(user, modelName, getFileName()));
7891
} catch (MalformedURLException ex) {
7992
throw new RuntimeException(ex);
8093
}
8194
}
8295

8396
public URL getHuggingFaceURL() {
8497
try {
85-
return new URL("https://huggingface.co/TheBloke/" + modelName);
98+
return new URL("https://huggingface.co/%s/%s".formatted(user, modelName));
8699
} catch (MalformedURLException ex) {
87100
throw new RuntimeException(ex);
88101
}

src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,21 @@ public enum LlamaModel {
8282
HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q5,
8383
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q3,
8484
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q4,
85-
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5));
85+
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5)),
86+
DBRX(
87+
"DBRX",
88+
"DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
89+
+ "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
90+
+ "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
91+
+ "language understanding, programming, math, and logic.",
92+
PromptTemplate.LLAMA,
93+
List.of(
94+
HuggingFaceModel.DBRX_12B_Q3_K_M,
95+
HuggingFaceModel.DBRX_12B_Q4_0,
96+
HuggingFaceModel.DBRX_12B_Q6_K,
97+
HuggingFaceModel.DBRX_12B_Q8_0,
98+
HuggingFaceModel.DBRX_12B_Q3_S,
99+
HuggingFaceModel.DBRX_12B_Q3_XXS));
86100

87101
private final String label;
88102
private final String description;

0 commit comments

Comments
 (0)