diff --git a/sagemaker-serve/debug.ipynb b/sagemaker-serve/debug.ipynb new file mode 100644 index 0000000000..1b01455d26 --- /dev/null +++ b/sagemaker-serve/debug.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2026-01-06T19:04:00.359722Z", + "start_time": "2026-01-06T19:03:57.427847Z" + } + }, + "source": [ + "from sagemaker.core import ScriptProcessor, image_uris\n", + "base_job_prefix = \"debug-1\"\n", + "\n", + "sklearn_processor = ScriptProcessor(\n", + " image_uri=image_uris.retrieve(\n", + " framework=\"sklearn\",\n", + " region=\"us-east-1\",\n", + " version=\"1.2-1\",\n", + " py_version=\"py3\",\n", + " instance_type=\"ml.m5.xlarge\",\n", + " ),\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=1,\n", + " base_job_name=f\"{base_job_prefix}-sklearn-preprocess-job-tags\",\n", + " tags= [{'Key': 'project', 'Value': 'tags-testing'}],\n", + " role= \"arn:aws:iam::211125564141:role/Admin\"\n", + ")" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "A module that was compiled using NumPy 1.x cannot be run in\n", + "NumPy 2.2.6 as it may crash. To support both 1.x and 2.x\n", + "versions of NumPy, modules must be compiled with NumPy 2.0.\n", + "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", + "\n", + "If you are a user of the module, the easiest solution will be to\n", + "downgrade to 'numpy<2' or try to upgrade the affected module.\n", + "We expect that some modules will need time to support NumPy 2.\n", + "\n", + "Traceback (most recent call last): File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", + " return _run_code(code, main_globals, None,\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/runpy.py\", line 86, in _run_code\n", + " exec(code, run_globals)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel_launcher.py\", line 18, in \n", + " app.launch_new_instance()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n", + " app.start()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelapp.py\", line 739, in start\n", + " self.io_loop.start()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/tornado/platform/asyncio.py\", line 205, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", + " self._run_once()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/base_events.py\", line 1909, in _run_once\n", + " handle._run()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n", + " await self.process_one()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n", + " await dispatch(*args)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n", + " await result\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n", + " await super().execute_request(stream, ident, parent)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n", + " reply_content = await reply_content\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n", + " res = shell.run_cell(\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3077, in run_cell\n", + " result = self._run_cell(\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3132, in _run_cell\n", + " result = runner(coro)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3336, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3519, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3579, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"/var/folders/vx/jl46pbwx02q9fjkt411r2nz80000gr/T/ipykernel_65663/1919898306.py\", line 1, in \n", + " from sagemaker.core import ScriptProcessor, image_uris\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/__init__.py\", line 8, in \n", + " from sagemaker.core.processing import ( # noqa: F401\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/processing.py\", line 55, in \n", + " from sagemaker.core.resources import ProcessingJob\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py\", line 46, in \n", + " from sagemaker.core.serializers.base import BaseSerializer\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/serializers/__init__.py\", line 6, in \n", + " from sagemaker.core.serializers.base import * # noqa: F401, F403\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/serializers/base.py\", line 22, in \n", + " from pandas import DataFrame\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/__init__.py\", line 26, in \n", + " from pandas.compat import (\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/compat/__init__.py\", line 29, in \n", + " from pandas.compat.pyarrow import (\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/compat/pyarrow.py\", line 8, in \n", + " import pyarrow as pa\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pyarrow/__init__.py\", line 65, in \n", + " import pyarrow.lib as _lib\n" + ] + }, + { + "data": { + "text/plain": [ + "\u001B[1;91mAttributeError: \u001B[0m_ARRAY_API not found\n" + ], + "text/html": [ + "
AttributeError: _ARRAY_API not found\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "A module that was compiled using NumPy 1.x cannot be run in\n", + "NumPy 2.2.6 as it may crash. To support both 1.x and 2.x\n", + "versions of NumPy, modules must be compiled with NumPy 2.0.\n", + "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", + "\n", + "If you are a user of the module, the easiest solution will be to\n", + "downgrade to 'numpy<2' or try to upgrade the affected module.\n", + "We expect that some modules will need time to support NumPy 2.\n", + "\n", + "Traceback (most recent call last): File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", + " return _run_code(code, main_globals, None,\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/runpy.py\", line 86, in _run_code\n", + " exec(code, run_globals)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel_launcher.py\", line 18, in \n", + " app.launch_new_instance()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n", + " app.start()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelapp.py\", line 739, in start\n", + " self.io_loop.start()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/tornado/platform/asyncio.py\", line 205, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", + " self._run_once()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/base_events.py\", line 1909, in _run_once\n", + " handle._run()\n", + " File \"/Users/nargokul/.pyenv/versions/3.10.13/lib/python3.10/asyncio/events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n", + " await self.process_one()\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n", + " await dispatch(*args)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n", + " await result\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n", + " await super().execute_request(stream, ident, parent)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n", + " reply_content = await reply_content\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n", + " res = shell.run_cell(\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3077, in run_cell\n", + " result = self._run_cell(\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3132, in _run_cell\n", + " result = runner(coro)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3336, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3519, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3579, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"/var/folders/vx/jl46pbwx02q9fjkt411r2nz80000gr/T/ipykernel_65663/1919898306.py\", line 1, in \n", + " from sagemaker.core import ScriptProcessor, image_uris\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/__init__.py\", line 8, in \n", + " from sagemaker.core.processing import ( # noqa: F401\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/processing.py\", line 55, in \n", + " from sagemaker.core.resources import ProcessingJob\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py\", line 46, in \n", + " from sagemaker.core.serializers.base import BaseSerializer\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/serializers/__init__.py\", line 6, in \n", + " from sagemaker.core.serializers.base import * # noqa: F401, F403\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/serializers/base.py\", line 22, in \n", + " from pandas import DataFrame\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/__init__.py\", line 49, in \n", + " from pandas.core.api import (\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/core/api.py\", line 9, in \n", + " from pandas.core.dtypes.dtypes import (\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py\", line 24, in \n", + " from pandas._libs import (\n", + " File \"/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/pyarrow/__init__.py\", line 65, in \n", + " import pyarrow.lib as _lib\n" + ] + }, + { + "data": { + "text/plain": [ + "\u001B[1;91mAttributeError: \u001B[0m_ARRAY_API not found\n" + ], + "text/html": [ + "
AttributeError: _ARRAY_API not found\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:03:59]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=873625;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=411522;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
[01/06/26 11:03:59] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", + "sagemaker.config INFO - Not applying SDK defaults from location: /Users/nargokul/Library/Application Support/sagemaker/config.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:04:00]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=302222;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=801428;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
[01/06/26 11:04:00] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 1 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-06T19:07:02.565262Z", + "start_time": "2026-01-06T19:04:05.639311Z" + } + }, + "cell_type": "code", + "source": [ + "sklearn_processor.run(\n", + " code=\"test_script.py\"\n", + ")" + ], + "id": "edc750dd7422da9b", + "outputs": [ + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:04:07]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Creating processing-job with name \u001B]8;id=41315;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/processing.py\u001B\\\u001B[2mprocessing.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=629262;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/processing.py#609\u001B\\\u001B[2m609\u001B[0m\u001B]8;;\u001B\\\n", + "\u001B[2;36m \u001B[0m debug-\u001B[1;36m1\u001B[0m-sklearn-preprocess-job-tags-\u001B[1;36m2026\u001B[0m-01-06-19-04-05-641 \u001B[2m \u001B[0m\n" + ], + "text/html": [ + "
[01/06/26 11:04:07] INFO     Creating processing-job with name                                    processing.py:609\n",
+       "                             debug-1-sklearn-preprocess-job-tags-2026-01-06-19-04-05-641                           \n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:04:08]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=26615;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=424189;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
[01/06/26 11:04:08] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/rich/live.py:231: UserWarning: \n", + "install \"ipywidgets\" for Jupyter support\n", + " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" + ], + "text/html": [ + "
/Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/rich/live.py:231: UserWarning: \n",
+       "install \"ipywidgets\" for Jupyter support\n",
+       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;215;175;0mWARNING \u001B[0m No region provided. Using default region. \u001B]8;id=555642;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/utils/utils.py\u001B\\\u001B[2mutils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=421982;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001B\\\u001B[2m340\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
                    WARNING  No region provided. Using default region.                                 utils.py:340\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Runs on sagemaker prod, region:us-east-\u001B[1;36m1\u001B[0m \u001B]8;id=547290;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/utils/utils.py\u001B\\\u001B[2mutils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=357923;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001B\\\u001B[2m354\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
                    INFO     Runs on sagemaker prod, region:us-east-1                                  utils.py:354\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=450725;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=925751;file:///Users/nargokul/workspace/sagemaker-python-sdk/venv10/lib/python3.10/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
                    INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:06:36]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m debug-\u001B[1;36m1\u001B[0m-sklearn-preprocess-job-tags-\u001B[1;36m2026\u001B[0m-01-06-19-04-05-641/algo-\u001B[1;36m1\u001B[0m- \u001B]8;id=572549;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py\u001B\\\u001B[2mresources.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=97857;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py#32152\u001B\\\u001B[2m32152\u001B[0m\u001B]8;;\u001B\\\n", + "\u001B[2;36m \u001B[0m \u001B[1;36m1767726309\u001B[0m: \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m Hello from processing script! \u001B[2m \u001B[0m\n" + ], + "text/html": [ + "
[01/06/26 11:06:36] INFO     debug-1-sklearn-preprocess-job-tags-2026-01-06-19-04-05-641/algo-1- resources.py:32152\n",
+       "                             1767726309:                                                                           \n",
+       "                             Hello from processing script!                                                         \n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001B[2;36m[01/06/26 11:07:02]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Final Resource Status: \u001B[1mCompleted\u001B[0m \u001B]8;id=387035;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py\u001B\\\u001B[2mresources.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=136009;file:///Users/nargokul/workspace/sagemaker-python-sdk/sagemaker-core/src/sagemaker/core/resources.py#32155\u001B\\\u001B[2m32155\u001B[0m\u001B]8;;\u001B\\\n" + ], + "text/html": [ + "
[01/06/26 11:07:02] INFO     Final Resource Status: Completed                                    resources.py:32155\n",
+       "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "92666619336839e0"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sagemaker-serve/src/sagemaker/serve/async_inference/__init__.py b/sagemaker-serve/src/sagemaker/serve/async_inference/__init__.py
index a2ab0b6459..6a255ae761 100644
--- a/sagemaker-serve/src/sagemaker/serve/async_inference/__init__.py
+++ b/sagemaker-serve/src/sagemaker/serve/async_inference/__init__.py
@@ -16,4 +16,6 @@
 
 from sagemaker.core.inference_config import AsyncInferenceConfig  # noqa: F401
 from sagemaker.serve.async_inference.waiter_config import WaiterConfig  # noqa: F401
-from sagemaker.serve.async_inference.async_inference_response import AsyncInferenceResponse  # noqa: F401
+from sagemaker.serve.async_inference.async_inference_response import (
+    AsyncInferenceResponse,
+)  # noqa: F401
diff --git a/sagemaker-serve/src/sagemaker/serve/async_inference/async_inference_config.py b/sagemaker-serve/src/sagemaker/serve/async_inference/async_inference_config.py
index 8d0414af29..27ed33e980 100644
--- a/sagemaker-serve/src/sagemaker/serve/async_inference/async_inference_config.py
+++ b/sagemaker-serve/src/sagemaker/serve/async_inference/async_inference_config.py
@@ -33,7 +33,7 @@
     "  from sagemaker.core.inference_config import AsyncInferenceConfig\n"
     "This compatibility shim will be removed in a future version.",
     DeprecationWarning,
-    stacklevel=2
+    stacklevel=2,
 )
 
-__all__ = ['AsyncInferenceConfig']
+__all__ = ["AsyncInferenceConfig"]
diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder.py b/sagemaker-serve/src/sagemaker/serve/model_builder.py
index 2cfaaaca00..4f36b0dddb 100644
--- a/sagemaker-serve/src/sagemaker/serve/model_builder.py
+++ b/sagemaker-serve/src/sagemaker/serve/model_builder.py
@@ -18,7 +18,7 @@
 from __future__ import absolute_import, annotations
 
 import json
-import re 
+import re
 import os
 import copy
 import logging
@@ -30,13 +30,21 @@
 from botocore.exceptions import ClientError
 import packaging.version
 
-from sagemaker.core.resources import Model, Endpoint, TrainingJob, HubContent, InferenceComponent, EndpointConfig
+from sagemaker.core.resources import (
+    Model,
+    Endpoint,
+    TrainingJob,
+    HubContent,
+    InferenceComponent,
+    EndpointConfig,
+)
 from sagemaker.core.shapes import (
     ContainerDefinition,
     ModelMetrics,
     MetadataProperties,
     ModelLifeCycle,
-    DriftCheckBaselines, InferenceComponentComputeResourceRequirements,
+    DriftCheckBaselines,
+    InferenceComponentComputeResourceRequirements,
 )
 from sagemaker.core.resources import (
     ModelPackage,
@@ -46,7 +54,12 @@
 )
 from sagemaker.core.utils.utils import logger
 from sagemaker.core.helper import session_helper
-from sagemaker.core.helper.session_helper import Session, get_execution_role, _wait_until, _deploy_done
+from sagemaker.core.helper.session_helper import (
+    Session,
+    get_execution_role,
+    _wait_until,
+    _deploy_done,
+)
 from sagemaker.core.helper.pipeline_variable import StrPipeVar, PipelineVariable
 
 from sagemaker.train.model_trainer import ModelTrainer
@@ -62,9 +75,7 @@
 from sagemaker.serve.mode.sagemaker_endpoint_mode import SageMakerEndpointMode
 from sagemaker.serve.mode.in_process_mode import InProcessMode
 from sagemaker.serve.utils.types import ModelServer, ModelHub
-from sagemaker.serve.detector.image_detector import (
-    _get_model_base, _detect_framework_and_version
-)
+from sagemaker.serve.detector.image_detector import _get_model_base, _detect_framework_and_version
 from sagemaker.serve.detector.pickler import save_pkl, save_xgboost
 from sagemaker.serve.validations.check_image_uri import is_1p_image_uri
 from sagemaker.core.inference_config import ResourceRequirements
@@ -84,7 +95,9 @@
 from sagemaker.core.jumpstart.artifacts.kwargs import _retrieve_model_deploy_kwargs
 
 from sagemaker.core.inference_config import AsyncInferenceConfig, ServerlessInferenceConfig
-from sagemaker.serve.batch_inference.batch_transform_inference_config import BatchTransformInferenceConfig
+from sagemaker.serve.batch_inference.batch_transform_inference_config import (
+    BatchTransformInferenceConfig,
+)
 
 from sagemaker.core.serializers import (
     NumpySerializer,
@@ -114,8 +127,11 @@
     update_container_with_inference_params,
 )
 from sagemaker.core.config.config_schema import (
-    MODEL_ENABLE_NETWORK_ISOLATION_PATH, MODEL_EXECUTION_ROLE_ARN_PATH,
-    MODEL_VPC_CONFIG_PATH, ENDPOINT_CONFIG_ASYNC_KMS_KEY_ID_PATH, MODEL_CONTAINERS_PATH
+    MODEL_ENABLE_NETWORK_ISOLATION_PATH,
+    MODEL_EXECUTION_ROLE_ARN_PATH,
+    MODEL_VPC_CONFIG_PATH,
+    ENDPOINT_CONFIG_ASYNC_KMS_KEY_ID_PATH,
+    MODEL_CONTAINERS_PATH,
 )
 from sagemaker.serve.constants import SUPPORTED_MODEL_SERVERS, Framework
 from sagemaker.core.workflow.pipeline_context import PipelineSession, runnable_by_pipeline
@@ -138,40 +154,41 @@
 SAGEMAKER_REGION_PARAM_NAME = "sagemaker_region"
 SAGEMAKER_OUTPUT_LOCATION = "sagemaker_s3_output"
 
+
 @dataclass
 class ModelBuilder(_InferenceRecommenderMixin, _ModelBuilderServers, _ModelBuilderUtils):
     """Unified interface for building and deploying machine learning models.
-    
+
     ModelBuilder provides a streamlined workflow for preparing and deploying ML models to
     Amazon SageMaker. It supports multiple frameworks (PyTorch, TensorFlow, HuggingFace, etc.),
     model servers (TorchServe, TGI, Triton, etc.), and deployment modes (SageMaker endpoints,
     local containers, in-process).
-    
+
     The typical workflow involves three steps:
     1. Initialize ModelBuilder with your model and configuration
     2. Call build() to create a deployable Model resource
     3. Call deploy() to create an Endpoint resource for inference
-    
+
     Example:
         >>> from sagemaker.serve.model_builder import ModelBuilder
         >>> from sagemaker.serve.mode.function_pointers import Mode
-        >>> 
+        >>>
         >>> # Initialize with a trained model
         >>> model_builder = ModelBuilder(
         ...     model=my_pytorch_model,
         ...     role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
         ...     instance_type="ml.m5.xlarge"
         ... )
-        >>> 
+        >>>
         >>> # Build the model (creates SageMaker Model resource)
         >>> model = model_builder.build()
-        >>> 
+        >>>
         >>> # Deploy to endpoint (creates SageMaker Endpoint resource)
         >>> endpoint = model_builder.deploy(endpoint_name="my-endpoint")
-        >>> 
+        >>>
         >>> # Make predictions
         >>> result = endpoint.invoke(data=input_data)
-    
+
     Args:
         model: The model to deploy. Can be a trained model object, ModelTrainer, TrainingJob,
             ModelPackage, or JumpStart model ID string. Either model or inference_spec is required.
@@ -195,16 +212,19 @@ class ModelBuilder(_InferenceRecommenderMixin, _ModelBuilderServers, _ModelBuild
         network: Network configuration including VPC settings and network isolation.
         instance_type: EC2 instance type for deployment (e.g., 'ml.m5.large').
         mode: Deployment mode (SAGEMAKER_ENDPOINT, LOCAL_CONTAINER, or IN_PROCESS).
-    
+
     Note:
         ModelBuilder returns sagemaker.core.resources.Model and sagemaker.core.resources.Endpoint
         objects, not the deprecated PySDK Model and Predictor classes. Use endpoint.invoke()
         instead of predictor.predict() for inference.
     """
+
     # ========================================
     # Core Model Definition
     # ========================================
-    model: Optional[Union[object, str, ModelTrainer, BaseTrainer, TrainingJob, ModelPackage, List[Model]]] = field(
+    model: Optional[
+        Union[object, str, ModelTrainer, BaseTrainer, TrainingJob, ModelPackage, List[Model]]
+    ] = field(
         default=None,
         metadata={
             "help": "The model object, JumpStart model ID, or training job from which to extract "
@@ -361,7 +381,6 @@ class ModelBuilder(_InferenceRecommenderMixin, _ModelBuilderServers, _ModelBuild
     _optimizing: bool = field(default=False, init=False)
     _deployment_config: Optional[Dict[str, Any]] = field(default=None, init=False)
 
-
     shared_libs: List[str] = field(
         default_factory=list,
         metadata={"help": "DEPRECATED: Use configure_for_torchserve() instead"},
@@ -379,6 +398,7 @@ def _create_session_with_region(self):
         """Create a SageMaker session with the correct region."""
         if hasattr(self, "region") and self.region:
             import boto3
+
             boto_session = boto3.Session(region_name=self.region)
             return Session(boto_session=boto_session)
         return Session()
@@ -407,21 +427,21 @@ def _warn_about_deprecated_parameters(self, warnings) -> None:
             warnings.warn(
                 "The 'shared_libs' parameter is deprecated. Use configure_for_torchserve() instead.",
                 DeprecationWarning,
-                stacklevel=3
+                stacklevel=3,
             )
 
         if self.dependencies and self.dependencies != {"auto": False}:
             warnings.warn(
                 "The 'dependencies' parameter is deprecated. Use configure_for_torchserve() instead.",
                 DeprecationWarning,
-                stacklevel=3
+                stacklevel=3,
             )
 
         if self.image_config is not None:
             warnings.warn(
                 "The 'image_config' parameter is deprecated. Use configure_for_torchserve() instead.",
                 DeprecationWarning,
-                stacklevel=3
+                stacklevel=3,
             )
 
     def _initialize_compute_config(self) -> None:
@@ -430,9 +450,9 @@ def _initialize_compute_config(self) -> None:
             self.instance_type = self.compute.instance_type
             self.instance_count = self.compute.instance_count or 1
         else:
-            if not hasattr(self, 'instance_type') or self.instance_type is None:
+            if not hasattr(self, "instance_type") or self.instance_type is None:
                 self.instance_type = None
-            if not hasattr(self, 'instance_count') or self.instance_count is None:
+            if not hasattr(self, "instance_count") or self.instance_count is None:
                 self.instance_count = 1
 
         self._user_provided_instance_type = bool(self.compute and self.compute.instance_type)
@@ -446,26 +466,30 @@ def _initialize_network_config(self) -> None:
             if self.network.vpc_config:
                 self.vpc_config = self.network.vpc_config
             else:
-                self.vpc_config = {
-                    'Subnets': self.network.subnets or [],
-                    'SecurityGroupIds': self.network.security_group_ids or []
-                } if (self.network.subnets or self.network.security_group_ids) else None
+                self.vpc_config = (
+                    {
+                        "Subnets": self.network.subnets or [],
+                        "SecurityGroupIds": self.network.security_group_ids or [],
+                    }
+                    if (self.network.subnets or self.network.security_group_ids)
+                    else None
+                )
             self._enable_network_isolation = self.network.enable_network_isolation
         else:
-            if not hasattr(self, 'vpc_config'):
+            if not hasattr(self, "vpc_config"):
                 self.vpc_config = None
-            if not hasattr(self, '_enable_network_isolation'):
+            if not hasattr(self, "_enable_network_isolation"):
                 self._enable_network_isolation = False
 
     def _initialize_defaults(self) -> None:
         """Initialize default values for unset parameters."""
-        if not hasattr(self, 'model_name') or self.model_name is None:
+        if not hasattr(self, "model_name") or self.model_name is None:
             self.model_name = "model-" + str(uuid.uuid4())[:8]
 
-        if not hasattr(self, 'mode') or self.mode is None:
+        if not hasattr(self, "mode") or self.mode is None:
             self.mode = Mode.SAGEMAKER_ENDPOINT
 
-        if not hasattr(self, 'env_vars') or self.env_vars is None:
+        if not hasattr(self, "env_vars") or self.env_vars is None:
             self.env_vars = {}
 
         # Set region with priority: user input > sagemaker session > AWS account region > default
@@ -476,6 +500,7 @@ def _initialize_defaults(self) -> None:
                 # Try to get region from boto3 session (AWS account config)
                 try:
                     import boto3
+
                     self.region = boto3.Session().region_name or None
                 except Exception:
                     self.region = None  # Default fallback
@@ -489,10 +514,10 @@ def _initialize_defaults(self) -> None:
         self.container_config = "host"
         self.inference_recommender_job_results = None
         self.container_log_level = logging.INFO
-        
-        if not hasattr(self, 'framework'):
+
+        if not hasattr(self, "framework"):
             self.framework = None
-        if not hasattr(self, 'framework_version'):
+        if not hasattr(self, "framework_version"):
             self.framework_version = None
 
     def _fetch_default_instance_type_for_custom_model(self) -> str:
@@ -507,9 +532,160 @@ def _fetch_default_instance_type_for_custom_model(self) -> str:
         logger.info(f"Fetching Instance Type from Hosting Configs - {default_instance_type}")
         return default_instance_type
 
+    def _resolve_model_artifact_uri(self) -> Optional[str]:
+        """Resolve the correct model artifact URI based on deployment type.
+
+        This method determines the appropriate S3 URI for model artifacts depending on
+        whether we're deploying a base model, a fine-tuned adapter (LORA), or a fully
+        fine-tuned model.
+
+        Returns:
+            Optional[str]: S3 URI to model artifacts, or None when not needed
+
+        Logic:
+            - For LORA adapters: Returns None (adapter weights are separate)
+            - For fine-tuned models: Returns None (model data is handled by the recipe/container)
+            - For base models: Uses HostingArtifactUri from JumpStart hub metadata
+            - For non-model-customization: Returns None
+
+        Raises:
+            ValueError: If model package or hub metadata is unavailable when needed
+        """
+        # Check if this is a LORA adapter deployment
+        peft_type = self._fetch_peft()
+        if peft_type == "LORA":
+            # LORA adapters don't need artifact_url - they reference base component
+            return None
+
+        # For model customization deployments, check if we have a model package
+        if self._is_model_customization():
+            model_package = self._fetch_model_package()
+            if model_package:
+                if (
+                    hasattr(model_package, "inference_specification")
+                    and model_package.inference_specification
+                    and hasattr(model_package.inference_specification, "containers")
+                    and model_package.inference_specification.containers
+                ):
+
+                    container = model_package.inference_specification.containers[0]
+
+                    # For fine-tuned models (have model_data_source), return None.
+                    # The model data is handled by the recipe/container configuration,
+                    # not via artifact_url in CreateInferenceComponent.
+                    if (
+                        hasattr(container, "model_data_source")
+                        and container.model_data_source
+                        and hasattr(container.model_data_source, "s3_data_source")
+                        and container.model_data_source.s3_data_source
+                    ):
+                        return None
+
+                    # For base models, get HostingArtifactUri from JumpStart
+                    if hasattr(container, "base_model") and container.base_model:
+                        try:
+                            hub_document = self._fetch_hub_document_for_custom_model()
+                            hosting_artifact_uri = hub_document.get("HostingArtifactUri")
+                            if hosting_artifact_uri:
+                                return hosting_artifact_uri
+                            else:
+                                logger.warning(
+                                    "HostingArtifactUri not found in JumpStart hub metadata. "
+                                    "Deployment may fail if artifact URI is required."
+                                )
+                                return None
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to retrieve HostingArtifactUri from JumpStart metadata: {e}. "
+                                f"Proceeding without artifact URI."
+                            )
+                            return None
+
+        # For non-model-customization deployments, return None
+        return None
+
+    def _infer_instance_type_from_jumpstart(self) -> str:
+        """Infer the appropriate instance type from JumpStart model metadata.
+
+        Queries JumpStart metadata for the base model and selects an appropriate
+        instance type from the supported list. Prefers GPU instance types for
+        models that require GPU acceleration.
+
+        Returns:
+            str: The inferred instance type (e.g., 'ml.g5.12xlarge')
+
+        Raises:
+            ValueError: If instance type cannot be inferred from metadata
+        """
+        try:
+            # Get the hub document which contains hosting configurations
+            hub_document = self._fetch_hub_document_for_custom_model()
+            hosting_configs = hub_document.get("HostingConfigs")
+
+            if not hosting_configs:
+                raise ValueError(
+                    "Unable to infer instance type: Model does not have hosting configuration. "
+                    "Please specify instance_type explicitly."
+                )
+
+            # Get the default hosting config
+            config = next(
+                (cfg for cfg in hosting_configs if cfg.get("Profile") == "Default"),
+                hosting_configs[0],
+            )
+
+            # Extract supported instance types
+            supported_instance_types = config.get("SupportedInstanceTypes", [])
+            default_instance_type = config.get("InstanceType") or config.get("DefaultInstanceType")
+
+            if not supported_instance_types and not default_instance_type:
+                raise ValueError(
+                    "Unable to infer instance type: Model metadata does not specify "
+                    "supported instance types. Please specify instance_type explicitly."
+                )
+
+            # If default instance type is specified, use it
+            if default_instance_type:
+                logger.info(
+                    f"Inferred instance type from JumpStart metadata: {default_instance_type}"
+                )
+                return default_instance_type
+
+            # Fallback to first supported instance type
+            selected_type = supported_instance_types[0]
+            logger.info(f"Inferred instance type from JumpStart metadata: {selected_type}")
+            return selected_type
+
+        except Exception as e:
+            # Provide helpful error message with context
+            error_msg = (
+                f"Unable to infer instance type for model customization deployment: {str(e)}. "
+                "Please specify instance_type explicitly when creating ModelBuilder."
+            )
+
+            # Try to provide available instance types in error message if possible
+            try:
+                hub_document = self._fetch_hub_document_for_custom_model()
+                hosting_configs = hub_document.get("HostingConfigs", [])
+                if hosting_configs:
+                    config = next(
+                        (cfg for cfg in hosting_configs if cfg.get("Profile") == "Default"),
+                        hosting_configs[0],
+                    )
+                    supported_types = config.get("SupportedInstanceTypes", [])
+                    if supported_types:
+                        error_msg += f"\nSupported instance types for this model: {supported_types}"
+            except Exception:
+                pass
+
+            raise ValueError(error_msg)
+
     def _fetch_hub_document_for_custom_model(self) -> dict:
         from sagemaker.core.shapes import BaseModel as CoreBaseModel
-        base_model: CoreBaseModel = self._fetch_model_package().inference_specification.containers[0].base_model
+
+        base_model: CoreBaseModel = (
+            self._fetch_model_package().inference_specification.containers[0].base_model
+        )
         hub_content = HubContent.get(
             hub_content_type="Model",
             hub_name="SageMakerPublicHub",
@@ -528,17 +704,16 @@ def _fetch_hosting_configs_for_custom_model(self) -> dict:
             )
         return hosting_configs
 
-
     def _get_instance_resources(self, instance_type: str) -> tuple:
         """Get CPU and memory for an instance type by querying EC2."""
         try:
-            ec2_client = self.sagemaker_session.boto_session.client('ec2')
-            ec2_instance_type = instance_type.replace('ml.', '')
+            ec2_client = self.sagemaker_session.boto_session.client("ec2")
+            ec2_instance_type = instance_type.replace("ml.", "")
             response = ec2_client.describe_instance_types(InstanceTypes=[ec2_instance_type])
-            if response['InstanceTypes']:
-                instance_info = response['InstanceTypes'][0]
-                cpus = instance_info['VCpuInfo']['DefaultVCpus']
-                memory_mb = instance_info['MemoryInfo']['SizeInMiB']
+            if response["InstanceTypes"]:
+                instance_info = response["InstanceTypes"][0]
+                cpus = instance_info["VCpuInfo"]["DefaultVCpus"]
+                memory_mb = instance_info["MemoryInfo"]["SizeInMiB"]
                 return cpus, memory_mb
         except Exception as e:
             logger.warning(
@@ -547,6 +722,219 @@ def _get_instance_resources(self, instance_type: str) -> tuple:
             )
         return None, None
 
+    def _resolve_compute_requirements(
+        self, instance_type: str, user_resource_requirements: Optional[ResourceRequirements] = None
+    ) -> InferenceComponentComputeResourceRequirements:
+        """Resolve compute requirements by merging JumpStart metadata with user config.
+
+        Retrieves default compute requirements from JumpStart model metadata and merges
+        them with user-provided ResourceRequirements. User-provided values take precedence
+        over defaults. Automatically determines number_of_accelerator_devices_required for
+        GPU instances when not explicitly provided.
+
+        Args:
+            instance_type: The EC2 instance type for deployment (e.g., 'ml.g5.12xlarge')
+            user_resource_requirements: Optional user-provided resource requirements
+
+        Returns:
+            InferenceComponentComputeResourceRequirements with all fields populated
+
+        Raises:
+            ValueError: If requirements are incompatible with instance_type or if
+                       accelerator count cannot be determined for GPU instances
+
+        Requirements: 2.1, 3.1, 3.2, 3.4
+        """
+        # Start with defaults from JumpStart metadata
+        hub_document = self._fetch_hub_document_for_custom_model()
+        hosting_configs = hub_document.get("HostingConfigs", [])
+
+        if not hosting_configs:
+            raise ValueError(
+                "Unable to resolve compute requirements: Model does not have hosting configuration. "
+                "Please provide resource requirements explicitly."
+            )
+
+        # Get the default hosting config
+        config = next(
+            (cfg for cfg in hosting_configs if cfg.get("Profile") == "Default"), hosting_configs[0]
+        )
+
+        return self._resolve_compute_requirements_from_config(
+            instance_type=instance_type,
+            config=config,
+            user_resource_requirements=user_resource_requirements,
+        )
+
+    def _resolve_compute_requirements_from_config(
+        self,
+        instance_type: str,
+        config: dict,
+        user_resource_requirements: Optional[ResourceRequirements] = None,
+    ) -> InferenceComponentComputeResourceRequirements:
+        """Resolve compute requirements from a hosting config dictionary.
+
+        Helper method that extracts compute requirements from an already-fetched
+        hosting config and merges with user-provided requirements.
+
+        Args:
+            instance_type: The EC2 instance type for deployment
+            config: The hosting config dictionary from JumpStart metadata
+            user_resource_requirements: Optional user-provided resource requirements
+
+        Returns:
+            InferenceComponentComputeResourceRequirements with all fields populated
+
+        Raises:
+            ValueError: If requirements are incompatible with instance_type
+        """
+        # Extract default compute requirements from metadata
+        compute_resource_requirements = config.get("ComputeResourceRequirements", {})
+        default_cpus = compute_resource_requirements.get("NumberOfCpuCoresRequired", 1)
+        # Use 1024 MB as safe default for min_memory - metadata values can exceed
+        # SageMaker inference component limits (which are lower than raw EC2 memory)
+        default_memory_mb = 1024
+        default_accelerators = compute_resource_requirements.get(
+            "NumberOfAcceleratorDevicesRequired"
+        )
+
+        # Get actual instance resources for validation
+        actual_cpus, actual_memory_mb = self._get_instance_resources(instance_type)
+
+        # Adjust CPU count if it exceeds instance capacity
+        if actual_cpus and default_cpus > actual_cpus:
+            logger.warning(
+                f"Default requirements request {default_cpus} CPUs but {instance_type} has {actual_cpus}. "
+                f"Adjusting to {actual_cpus}."
+            )
+            default_cpus = actual_cpus
+
+        # Initialize with defaults
+        final_cpus = default_cpus
+        final_min_memory = default_memory_mb
+        final_max_memory = None
+        final_accelerators = default_accelerators
+
+        # Merge with user-provided requirements (user values take precedence)
+        if user_resource_requirements:
+            if user_resource_requirements.num_cpus is not None:
+                final_cpus = user_resource_requirements.num_cpus
+            if user_resource_requirements.min_memory is not None:
+                final_min_memory = user_resource_requirements.min_memory
+            if user_resource_requirements.max_memory is not None:
+                final_max_memory = user_resource_requirements.max_memory
+            if user_resource_requirements.num_accelerators is not None:
+                final_accelerators = user_resource_requirements.num_accelerators
+
+        # Determine accelerator count for GPU instances if not provided
+        # Also strip accelerator count for CPU instances (AWS rejects it)
+        is_gpu_instance = self._is_gpu_instance(instance_type)
+
+        if not is_gpu_instance:
+            # CPU instance - must NOT include accelerator count
+            if final_accelerators is not None:
+                logger.info(
+                    f"Removing accelerator count ({final_accelerators}) for CPU instance type {instance_type}"
+                )
+            final_accelerators = None
+        elif final_accelerators is None:
+            # GPU instance without accelerator count - try to infer
+            accelerator_count = self._infer_accelerator_count_from_instance_type(instance_type)
+            if accelerator_count is not None:
+                final_accelerators = accelerator_count
+                logger.info(
+                    f"Inferred {final_accelerators} accelerator device(s) for instance type {instance_type}"
+                )
+            else:
+                # Cannot determine accelerator count - raise descriptive error
+                raise ValueError(
+                    f"Instance type '{instance_type}' requires accelerator device count specification.\n"
+                    f"Please provide ResourceRequirements with number of accelerators:\n\n"
+                    f"    from sagemaker.core.inference_config import ResourceRequirements\n\n"
+                    f"    resource_requirements = ResourceRequirements(\n"
+                    f"        requests={{\n"
+                    f"            'num_accelerators': ,\n"
+                    f"            'memory': {final_min_memory}\n"
+                    f"        }}\n"
+                    f"    )\n\n"
+                    f"For {instance_type}, check AWS documentation for the number of GPUs available."
+                )
+
+        # Validate requirements are compatible with instance type
+        # Only validate user-provided requirements (defaults are already adjusted above)
+        if user_resource_requirements:
+            if (
+                actual_cpus
+                and user_resource_requirements.num_cpus is not None
+                and user_resource_requirements.num_cpus > actual_cpus
+            ):
+                raise ValueError(
+                    f"Resource requirements incompatible with instance type '{instance_type}'.\n"
+                    f"Requested: {user_resource_requirements.num_cpus} CPUs\n"
+                    f"Available: {actual_cpus} CPUs\n"
+                    f"Please reduce CPU requirements or select a larger instance type."
+                )
+
+            if (
+                actual_memory_mb
+                and user_resource_requirements.min_memory is not None
+                and user_resource_requirements.min_memory > actual_memory_mb
+            ):
+                raise ValueError(
+                    f"Resource requirements incompatible with instance type '{instance_type}'.\n"
+                    f"Requested: {user_resource_requirements.min_memory} MB memory\n"
+                    f"Available: {actual_memory_mb} MB memory\n"
+                    f"Please reduce memory requirements or select a larger instance type."
+                )
+
+        # Create and return InferenceComponentComputeResourceRequirements
+        requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=final_min_memory, number_of_cpu_cores_required=final_cpus
+        )
+
+        if final_max_memory is not None:
+            requirements.max_memory_required_in_mb = final_max_memory
+
+        if final_accelerators is not None:
+            requirements.number_of_accelerator_devices_required = final_accelerators
+
+        return requirements
+
+    def _infer_accelerator_count_from_instance_type(self, instance_type: str) -> Optional[int]:
+        """Infer the number of accelerator devices by querying EC2 instance type info.
+
+        Args:
+            instance_type: The EC2 instance type (e.g., 'ml.g5.12xlarge')
+
+        Returns:
+            Number of accelerator devices, or None if cannot be determined
+        """
+        try:
+            ec2_client = self.sagemaker_session.boto_session.client("ec2")
+            ec2_instance_type = instance_type.replace("ml.", "")
+            response = ec2_client.describe_instance_types(InstanceTypes=[ec2_instance_type])
+            if response["InstanceTypes"]:
+                gpu_info = response["InstanceTypes"][0].get("GpuInfo")
+                if gpu_info and gpu_info.get("Gpus"):
+                    total_gpus = sum(g.get("Count", 0) for g in gpu_info["Gpus"])
+                    if total_gpus > 0:
+                        return total_gpus
+        except Exception as e:
+            logger.warning(f"Could not query GPU count for {instance_type}: {e}.")
+        return None
+
+    def _is_gpu_instance(self, instance_type: str) -> bool:
+        """Check if an instance type has GPUs by querying EC2.
+
+        Args:
+            instance_type: The EC2 instance type (e.g., 'ml.g5.12xlarge')
+
+        Returns:
+            True if the instance type has GPUs, False otherwise
+        """
+        gpu_count = self._infer_accelerator_count_from_instance_type(instance_type)
+        return gpu_count is not None and gpu_count > 0
+
     def _fetch_and_cache_recipe_config(self):
         """Fetch and cache image URI, compute requirements, and s3_upload_path from recipe during build."""
         hub_document = self._fetch_hub_document_for_custom_model()
@@ -554,7 +942,9 @@ def _fetch_and_cache_recipe_config(self):
         recipe_name = model_package.inference_specification.containers[0].base_model.recipe_name
 
         if not self.s3_upload_path:
-            self.s3_upload_path = model_package.inference_specification.containers[0].model_data_source.s3_data_source.s3_uri
+            self.s3_upload_path = model_package.inference_specification.containers[
+                0
+            ].model_data_source.s3_data_source.s3_uri
 
         for recipe in hub_document.get("RecipeCollection", []):
             if recipe.get("Name") == recipe_name:
@@ -562,28 +952,32 @@ def _fetch_and_cache_recipe_config(self):
                 if hosting_configs:
                     config = next(
                         (cfg for cfg in hosting_configs if cfg.get("Profile") == "Default"),
-                        hosting_configs[0]
+                        hosting_configs[0],
                     )
                     if not self.image_uri:
                         self.image_uri = config.get("EcrAddress")
+
+                    # Infer instance type from JumpStart metadata if not provided
+                    # This is only called for model_customization deployments
                     if not self.instance_type:
-                        self.instance_type = config.get("InstanceType") or config.get("DefaultInstanceType")
-                    
-                    compute_resource_requirements = config.get("ComputeResourceRequirements", {})
-                    requested_cpus = compute_resource_requirements.get("NumberOfCpuCoresRequired", 1)
-                    
-                    # Get actual CPU count from instance type
-                    actual_cpus, _ = self._get_instance_resources(self.instance_type)
-                    if actual_cpus and requested_cpus > actual_cpus:
-                        logger.warning(
-                            f"Recipe requests {requested_cpus} CPUs but {self.instance_type} has {actual_cpus}. "
-                            f"Adjusting to {actual_cpus}."
+                        # Try to get from recipe config first
+                        self.instance_type = config.get("InstanceType") or config.get(
+                            "DefaultInstanceType"
+                        )
+
+                        # If still not available, use the inference method
+                        if not self.instance_type:
+                            self.instance_type = self._infer_instance_type_from_jumpstart()
+
+                    # Resolve compute requirements using the already-fetched hub document
+                    # This ensures requirements are determined through public methods
+                    # and properly merged with any user-provided configuration
+                    self._cached_compute_requirements = (
+                        self._resolve_compute_requirements_from_config(
+                            instance_type=self.instance_type,
+                            config=config,
+                            user_resource_requirements=None,  # No user config at build time
                         )
-                        requested_cpus = actual_cpus
-                    
-                    self._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
-                        min_memory_required_in_mb=1024,
-                        number_of_cpu_cores_required=requested_cpus
                     )
                     return
 
@@ -597,10 +991,9 @@ def _initialize_jumpstart_config(self) -> None:
         """Initialize JumpStart-specific configuration."""
         if hasattr(self, "hub_name") and self.hub_name and not self.hub_arn:
             from sagemaker.core.jumpstart.hub.utils import generate_hub_arn_for_init_kwargs
+
             self.hub_arn = generate_hub_arn_for_init_kwargs(
-                hub_name=self.hub_name,
-                region=self.region,
-                session=self.sagemaker_session
+                hub_name=self.hub_name, region=self.region, session=self.sagemaker_session
             )
         else:
             self.hub_name = None
@@ -608,6 +1001,7 @@ def _initialize_jumpstart_config(self) -> None:
 
         if isinstance(self.model, str) and (not hasattr(self, "model_type") or not self.model_type):
             from sagemaker.core.jumpstart.utils import validate_model_id_and_get_type
+
             try:
                 self.model_type = validate_model_id_and_get_type(
                     model_id=self.model,
@@ -622,6 +1016,7 @@ def _initialize_jumpstart_config(self) -> None:
             # Add tags for the JumpStart model
             from sagemaker.core.jumpstart.utils import add_jumpstart_model_info_tags
             from sagemaker.core.jumpstart.enums import JumpStartScriptScope
+
             self._tags = add_jumpstart_model_info_tags(
                 self._tags,
                 self.model,
@@ -654,15 +1049,16 @@ def _initialize_jumpstart_config(self) -> None:
         if not hasattr(self, "accept_eula"):
             self.accept_eula = None
 
-
     def _initialize_script_mode_variables(self) -> None:
         """Initialize script mode variables from source_code or defaults."""
 
         # Map SourceCode to model.py equivalents
         if self.source_code:
             self.entry_point = self.source_code.entry_script
-            if hasattr(self.source_code, 'requirements'):
-                self.script_dependencies = [self.source_code.requirements] if self.source_code.requirements else []
+            if hasattr(self.source_code, "requirements"):
+                self.script_dependencies = (
+                    [self.source_code.requirements] if self.source_code.requirements else []
+                )
             else:
                 self.script_dependencies = []
                 logger.warning(
@@ -708,16 +1104,16 @@ def _get_client_translators(self) -> tuple:
         elif self.schema_builder:
             deserializer = self.schema_builder.output_deserializer
 
-
         if serializer is None or deserializer is None:
-            auto_serializer, auto_deserializer = self._fetch_serializer_and_deserializer_for_framework(self.framework)
+            auto_serializer, auto_deserializer = (
+                self._fetch_serializer_and_deserializer_for_framework(self.framework)
+            )
 
             if serializer is None:
                 serializer = auto_serializer
             if deserializer is None:
                 deserializer = auto_deserializer
 
-
         if serializer is None:
             raise ValueError("Cannot determine serializer. Try providing a SchemaBuilder.")
         if deserializer is None:
@@ -725,7 +1121,6 @@ def _get_client_translators(self) -> tuple:
 
         return serializer, deserializer
 
-
     def _save_model_inference_spec(self) -> None:
         """Save model or inference specification to the model path."""
 
@@ -743,16 +1138,15 @@ def _save_model_inference_spec(self) -> None:
         elif self.model:
             if isinstance(self.model, str):
                 self.framework = None
-                self.env_vars.update({
-                    "MODEL_CLASS_NAME": self.model
-                })
+                self.env_vars.update({"MODEL_CLASS_NAME": self.model})
             else:
                 fw, _ = _detect_framework_and_version(str(_get_model_base(self.model)))
                 self.framework = self._normalize_framework_to_enum(fw)
-                self.env_vars.update({
-                    "MODEL_CLASS_NAME": f"{self.model.__class__.__module__}.{self.model.__class__.__name__}"
-                })
-
+                self.env_vars.update(
+                    {
+                        "MODEL_CLASS_NAME": f"{self.model.__class__.__module__}.{self.model.__class__.__name__}"
+                    }
+                )
 
             if self.framework == Framework.XGBOOST:
                 save_xgboost(code_path, self.model)
@@ -764,7 +1158,6 @@ def _save_model_inference_spec(self) -> None:
         else:
             raise ValueError("Cannot detect required model or inference spec")
 
-
     def _prepare_for_mode(
         self, model_path: Optional[str] = None, should_upload_artifacts: Optional[bool] = False
     ) -> Optional[tuple]:
@@ -823,19 +1216,24 @@ def _prepare_for_mode(
             f"Supported modes: {Mode.LOCAL_CONTAINER}, {Mode.SAGEMAKER_ENDPOINT}, {Mode.IN_PROCESS}"
         )
 
-
     def _build_validations(self) -> None:
         """Validate ModelBuilder configuration before building."""
         if isinstance(self.model, ModelTrainer) and not self.inference_spec:
             # Check if this is a JumpStart ModelTrainer (which doesn't need InferenceSpec)
-            if not (hasattr(self.model, '_jumpstart_config') and self.model._jumpstart_config is not None):
+            if not (
+                hasattr(self.model, "_jumpstart_config")
+                and self.model._jumpstart_config is not None
+            ):
                 raise ValueError(
                     "InferenceSpec is required when using ModelTrainer, "
                     "unless it's a JumpStart ModelTrainer created with from_jumpstart_config()"
                 )
 
         if isinstance(self.model, ModelTrainer):
-            is_jumpstart = hasattr(self.model, '_jumpstart_config') and self.model._jumpstart_config is not None
+            is_jumpstart = (
+                hasattr(self.model, "_jumpstart_config")
+                and self.model._jumpstart_config is not None
+            )
 
             if not is_jumpstart and not self.image_uri:
                 logger.warning(
@@ -857,13 +1255,23 @@ def _build_validations(self) -> None:
         if self.inference_spec and self.model and not isinstance(self.model, ModelTrainer):
             raise ValueError("Can only set one of the following: model, inference_spec.")
 
-
-        if self.image_uri and is_1p_image_uri(self.image_uri) and not self.model and not self.inference_spec and not getattr(self, '_is_mlflow_model', False):
+        if (
+            self.image_uri
+            and is_1p_image_uri(self.image_uri)
+            and not self.model
+            and not self.inference_spec
+            and not getattr(self, "_is_mlflow_model", False)
+        ):
             self._passthrough = True
             return
 
-
-        if self.image_uri and not is_1p_image_uri(self.image_uri) and not self.model and not self.inference_spec and not getattr(self, '_is_mlflow_model', False):
+        if (
+            self.image_uri
+            and not is_1p_image_uri(self.image_uri)
+            and not self.model
+            and not self.inference_spec
+            and not getattr(self, "_is_mlflow_model", False)
+        ):
             self._passthrough = True
             return
 
@@ -874,7 +1282,6 @@ def _build_validations(self) -> None:
                 f"Supported model servers: {SUPPORTED_MODEL_SERVERS}"
             )
 
-
     def _build_for_passthrough(self) -> Model:
         """Build model for pass-through cases with image-only deployment."""
         if not self.image_uri:
@@ -883,7 +1290,6 @@ def _build_for_passthrough(self) -> Model:
         self.s3_upload_path = None
         return self._create_model()
 
-
     def _build_default_async_inference_config(self, async_inference_config):
         """Build default async inference config and return ``AsyncInferenceConfig``"""
         unique_folder = unique_name_from_base(self.model_name)
@@ -909,7 +1315,6 @@ def _build_default_async_inference_config(self, async_inference_config):
 
         return async_inference_config
 
-
     def enable_network_isolation(self):
         """Whether to enable network isolation when creating this Model
 
@@ -937,34 +1342,66 @@ def _is_model_customization(self) -> bool:
         # Check both model_package_config (new location) and serverless_job_config (legacy)
         if isinstance(self.model, TrainingJob):
             # Check model_package_config first (new location)
-            if (hasattr(self.model, 'model_package_config') and self.model.model_package_config != Unassigned
-                    and getattr(self.model.model_package_config, 'source_model_package_arn', Unassigned) != Unassigned):
+            if (
+                hasattr(self.model, "model_package_config")
+                and self.model.model_package_config != Unassigned
+                and getattr(self.model.model_package_config, "source_model_package_arn", Unassigned)
+                != Unassigned
+            ):
                 return True
             # Fallback to serverless_job_config (legacy location)
-            if (hasattr(self.model, 'serverless_job_config') and self.model.serverless_job_config != Unassigned
-                    and hasattr(self.model, 'output_model_package_arn') and self.model.output_model_package_arn!= Unassigned):
+            if (
+                hasattr(self.model, "serverless_job_config")
+                and self.model.serverless_job_config != Unassigned
+                and hasattr(self.model, "output_model_package_arn")
+                and self.model.output_model_package_arn != Unassigned
+            ):
                 return True
 
         # ModelTrainer with model customization
-        if isinstance(self.model, ModelTrainer) and hasattr(self.model, '_latest_training_job'):
+        if isinstance(self.model, ModelTrainer) and hasattr(self.model, "_latest_training_job"):
             # Check model_package_config first (new location)
-            if (hasattr(self.model._latest_training_job, 'model_package_config') and self.model._latest_training_job.model_package_config != Unassigned()
-                    and getattr(self.model._latest_training_job.model_package_config, 'source_model_package_arn', Unassigned()) != Unassigned()):
+            if (
+                hasattr(self.model._latest_training_job, "model_package_config")
+                and self.model._latest_training_job.model_package_config != Unassigned()
+                and getattr(
+                    self.model._latest_training_job.model_package_config,
+                    "source_model_package_arn",
+                    Unassigned(),
+                )
+                != Unassigned()
+            ):
                 return True
             # Fallback to serverless_job_config (legacy location)
-            if (hasattr(self.model._latest_training_job, 'serverless_job_config') and self.model._latest_training_job.serverless_job_config != Unassigned()
-                    and hasattr(self.model._latest_training_job, 'output_model_package_arn') and self.model._latest_training_job.output_model_package_arn!= Unassigned()):
+            if (
+                hasattr(self.model._latest_training_job, "serverless_job_config")
+                and self.model._latest_training_job.serverless_job_config != Unassigned()
+                and hasattr(self.model._latest_training_job, "output_model_package_arn")
+                and self.model._latest_training_job.output_model_package_arn != Unassigned()
+            ):
                 return True
 
         # BaseTrainer with model customization
-        if isinstance(self.model, BaseTrainer) and hasattr(self.model, '_latest_training_job'):
+        if isinstance(self.model, BaseTrainer) and hasattr(self.model, "_latest_training_job"):
             # Check model_package_config first (new location)
-            if (hasattr(self.model._latest_training_job, 'model_package_config') and self.model._latest_training_job.model_package_config != Unassigned()
-                    and getattr(self.model._latest_training_job.model_package_config, 'source_model_package_arn', Unassigned()) != Unassigned()):
+            if (
+                hasattr(self.model._latest_training_job, "model_package_config")
+                and self.model._latest_training_job.model_package_config != Unassigned()
+                and getattr(
+                    self.model._latest_training_job.model_package_config,
+                    "source_model_package_arn",
+                    Unassigned(),
+                )
+                != Unassigned()
+            ):
                 return True
             # Fallback to serverless_job_config (legacy location)
-            if (hasattr(self.model._latest_training_job, 'serverless_job_config') and self.model._latest_training_job.serverless_job_config != Unassigned()
-                    and hasattr(self.model._latest_training_job, 'output_model_package_arn') and self.model._latest_training_job.output_model_package_arn!= Unassigned()):
+            if (
+                hasattr(self.model._latest_training_job, "serverless_job_config")
+                and self.model._latest_training_job.serverless_job_config != Unassigned()
+                and hasattr(self.model._latest_training_job, "output_model_package_arn")
+                and self.model._latest_training_job.output_model_package_arn != Unassigned()
+            ):
                 return True
 
         return False
@@ -976,51 +1413,74 @@ def _fetch_model_package_arn(self) -> Optional[str]:
             Optional[str]: The model package ARN, or None if not available.
         """
         from sagemaker.core.utils.utils import Unassigned
-        
+
         if isinstance(self.model, ModelPackage):
             return self.model.model_package_arn
         if isinstance(self.model, TrainingJob):
             # Try output_model_package_arn first (preferred)
-            if hasattr(self.model, 'output_model_package_arn'):
+            if hasattr(self.model, "output_model_package_arn"):
                 arn = self.model.output_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             # Fallback to model_package_config.source_model_package_arn
-            if hasattr(self.model, 'model_package_config') and self.model.model_package_config != Unassigned and hasattr(self.model.model_package_config, 'source_model_package_arn'):
+            if (
+                hasattr(self.model, "model_package_config")
+                and self.model.model_package_config != Unassigned
+                and hasattr(self.model.model_package_config, "source_model_package_arn")
+            ):
                 arn = self.model.model_package_config.source_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             # Fallback to serverless_job_config.source_model_package_arn (legacy)
-            if hasattr(self.model, 'serverless_job_config') and self.model.serverless_job_config != Unassigned and hasattr(self.model.serverless_job_config, 'source_model_package_arn'):
+            if (
+                hasattr(self.model, "serverless_job_config")
+                and self.model.serverless_job_config != Unassigned
+                and hasattr(self.model.serverless_job_config, "source_model_package_arn")
+            ):
                 arn = self.model.serverless_job_config.source_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             return None
-        
-        if isinstance(self.model, (ModelTrainer, BaseTrainer)) and hasattr(self.model, '_latest_training_job'):
+
+        if isinstance(self.model, (ModelTrainer, BaseTrainer)) and hasattr(
+            self.model, "_latest_training_job"
+        ):
             # Try output_model_package_arn first (preferred)
-            if hasattr(self.model._latest_training_job, 'output_model_package_arn'):
+            if hasattr(self.model._latest_training_job, "output_model_package_arn"):
                 arn = self.model._latest_training_job.output_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             # Fallback to model_package_config.source_model_package_arn
-            if hasattr(self.model._latest_training_job, 'model_package_config') and self.model._latest_training_job.model_package_config != Unassigned and hasattr(self.model._latest_training_job.model_package_config, 'source_model_package_arn'):
+            if (
+                hasattr(self.model._latest_training_job, "model_package_config")
+                and self.model._latest_training_job.model_package_config != Unassigned
+                and hasattr(
+                    self.model._latest_training_job.model_package_config, "source_model_package_arn"
+                )
+            ):
                 arn = self.model._latest_training_job.model_package_config.source_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             # Fallback to serverless_job_config.source_model_package_arn (legacy)
-            if hasattr(self.model._latest_training_job, 'serverless_job_config') and self.model._latest_training_job.serverless_job_config != Unassigned and hasattr(self.model._latest_training_job.serverless_job_config, 'source_model_package_arn'):
+            if (
+                hasattr(self.model._latest_training_job, "serverless_job_config")
+                and self.model._latest_training_job.serverless_job_config != Unassigned
+                and hasattr(
+                    self.model._latest_training_job.serverless_job_config,
+                    "source_model_package_arn",
+                )
+            ):
                 arn = self.model._latest_training_job.serverless_job_config.source_model_package_arn
                 if not isinstance(arn, Unassigned):
                     return arn
-            
+
             return None
-        
+
         return None
 
     def _fetch_model_package(self) -> Optional[ModelPackage]:
@@ -1031,7 +1491,7 @@ def _fetch_model_package(self) -> Optional[ModelPackage]:
         """
         if isinstance(self.model, ModelPackage):
             return self.model
-        
+
         # Get the ARN and check if it's valid
         arn = self._fetch_model_package_arn()
         if arn:
@@ -1044,7 +1504,7 @@ def _convert_model_data_source_to_local(self, model_data_source):
             return None
 
         result = {}
-        if hasattr(model_data_source, 's3_data_source') and model_data_source.s3_data_source:
+        if hasattr(model_data_source, "s3_data_source") and model_data_source.s3_data_source:
             s3_source = model_data_source.s3_data_source
             result["S3DataSource"] = {
                 "S3Uri": s3_source.s3_uri,
@@ -1053,7 +1513,7 @@ def _convert_model_data_source_to_local(self, model_data_source):
             }
 
             # Handle ModelAccessConfig if present
-            if hasattr(s3_source, 'model_access_config') and s3_source.model_access_config:
+            if hasattr(s3_source, "model_access_config") and s3_source.model_access_config:
                 result["S3DataSource"]["ModelAccessConfig"] = {
                     "AcceptEula": s3_source.model_access_config.accept_eula
                 }
@@ -1071,7 +1531,7 @@ def _convert_additional_sources_to_local(self, additional_sources):
                 "ChannelName": source.channel_name,
             }
 
-            if hasattr(source, 's3_data_source') and source.s3_data_source:
+            if hasattr(source, "s3_data_source") and source.s3_data_source:
                 s3_source = source.s3_data_source
                 source_dict["S3DataSource"] = {
                     "S3Uri": s3_source.s3_uri,
@@ -1080,7 +1540,7 @@ def _convert_additional_sources_to_local(self, additional_sources):
                 }
 
                 # Handle ModelAccessConfig if present
-                if hasattr(s3_source, 'model_access_config') and s3_source.model_access_config:
+                if hasattr(s3_source, "model_access_config") and s3_source.model_access_config:
                     source_dict["S3DataSource"]["ModelAccessConfig"] = {
                         "AcceptEula": s3_source.model_access_config.accept_eula
                     }
@@ -1094,7 +1554,6 @@ def _get_source_code_env_vars(self) -> Dict[str, str]:
         if not self.source_code:
             return {}
 
-
         script_name = self.source_code.entry_script
         dir_name = (
             self.source_code.source_dir
@@ -1109,7 +1568,6 @@ def _get_source_code_env_vars(self) -> Dict[str, str]:
             "SAGEMAKER_REGION": self.region,
         }
 
-
     def to_string(self, obj: object):
         """Convert an object to string
 
@@ -1134,7 +1592,6 @@ def is_repack(self) -> bool:
 
         return self.source_dir and self.entry_point and not (self.key_prefix or self.git_config)
 
-
     def _upload_code(self, key_prefix: str, repack: bool = False) -> None:
         """Uploads code to S3 to be used with script mode with SageMaker inference.
 
@@ -1252,7 +1709,6 @@ def _script_mode_env_vars(self):
             SAGEMAKER_REGION_PARAM_NAME.upper(): self.region,
         }
 
-
     def _is_mms_version(self):
         """Determines if the framework corresponds to an and using MMS.
 
@@ -1269,7 +1725,6 @@ def _is_mms_version(self):
         framework_version = packaging.version.Version(self.framework_version)
         return framework_version >= lowest_mms_version
 
-
     def _get_container_env(self):
         """Placeholder docstring."""
         if not self._container_log_level:
@@ -1283,11 +1738,10 @@ def _get_container_env(self):
         env[self.LOG_LEVEL_PARAM_NAME] = self.LOG_LEVEL_MAP[self._container_log_level]
         return env
 
-
     def _prepare_container_def_base(self):
         """Base container definition logic from your prepare_container_def_base.
-            dict or list[dict]: A container definition object or list of container definitions
-                usable with the CreateModel API.
+        dict or list[dict]: A container definition object or list of container definitions
+            usable with the CreateModel API.
         """
         # Handle pipeline models with multiple containers
         if isinstance(self.model, list):
@@ -1299,62 +1753,63 @@ def _prepare_container_def_base(self):
             return self._prepare_pipeline_container_defs()
 
         deploy_key_prefix = fw_utils.model_code_key_prefix(
-            getattr(self, 'key_prefix', None),
-            self.model_name,
-            self.image_uri
+            getattr(self, "key_prefix", None), self.model_name, self.image_uri
         )
 
-        deploy_env = copy.deepcopy(getattr(self, 'env_vars', {}))
+        deploy_env = copy.deepcopy(getattr(self, "env_vars", {}))
 
-        if (getattr(self, 'source_dir', None) or
-            getattr(self, 'dependencies', None) or
-            getattr(self, 'entry_point', None) or
-            getattr(self, 'git_config', None)):
+        if (
+            getattr(self, "source_dir", None)
+            or getattr(self, "dependencies", None)
+            or getattr(self, "entry_point", None)
+            or getattr(self, "git_config", None)
+        ):
 
-            self._upload_code(deploy_key_prefix, repack=getattr(self, 'is_repack', lambda: False)())
+            self._upload_code(deploy_key_prefix, repack=getattr(self, "is_repack", lambda: False)())
             deploy_env.update(self._script_mode_env_vars())
 
         # Determine model data URL: prioritize repacked > s3_upload_path > s3_model_data_url
-        model_data_url = (getattr(self, 'repacked_model_data', None) or
-                        getattr(self, 's3_upload_path', None) or
-                        getattr(self, 's3_model_data_url', None))
+        model_data_url = (
+            getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None)
+        )
 
         return container_def(
             self.image_uri,
             model_data_url,
             deploy_env,
-            image_config=getattr(self, 'image_config', None),
-            accept_eula=getattr(self, 'accept_eula', None),
-            additional_model_data_sources=getattr(self, 'additional_model_data_sources', None),
-            model_reference_arn=getattr(self, 'model_reference_arn', None),
+            image_config=getattr(self, "image_config", None),
+            accept_eula=getattr(self, "accept_eula", None),
+            additional_model_data_sources=getattr(self, "additional_model_data_sources", None),
+            model_reference_arn=getattr(self, "model_reference_arn", None),
         )
 
-
     def _handle_tf_repack(self, deploy_key_prefix, instance_type, serverless_inference_config):
         """Handle TensorFlow-specific repack logic."""
         bucket, key_prefix = s3.determine_bucket_and_prefix(
-            bucket=getattr(self, 'bucket', None),
+            bucket=getattr(self, "bucket", None),
             key_prefix=deploy_key_prefix,
             sagemaker_session=self.sagemaker_session,
         )
 
-        if self.entry_point and not is_pipeline_variable(getattr(self, 'model_data', None)):
+        if self.entry_point and not is_pipeline_variable(getattr(self, "model_data", None)):
             model_data = s3.s3_path_join("s3://", bucket, key_prefix, "model.tar.gz")
 
             repack_model(
                 self.entry_point,
-                getattr(self, 'source_dir', None),
-                getattr(self, 'dependencies', None),
-                getattr(self, 'model_data', None),
+                getattr(self, "source_dir", None),
+                getattr(self, "dependencies", None),
+                getattr(self, "model_data", None),
                 model_data,
                 self.sagemaker_session,
-                kms_key=getattr(self, 'model_kms_key', None),
+                kms_key=getattr(self, "model_kms_key", None),
             )
 
             # Update model_data for container_def
             self.model_data = model_data
 
-        elif self.entry_point and is_pipeline_variable(getattr(self, 'model_data', None)):
+        elif self.entry_point and is_pipeline_variable(getattr(self, "model_data", None)):
             # Handle pipeline variable case
             if isinstance(self.sagemaker_session, PipelineSession):
                 self.sagemaker_session.context.need_runtime_repack.add(id(self))
@@ -1370,13 +1825,15 @@ def _handle_tf_repack(self, deploy_key_prefix, instance_type, serverless_inferen
                     "runtime repack may be missing. For more, see: "
                     "https://sagemaker.readthedocs.io/en/stable/"
                     "amazon_sagemaker_model_building_pipeline.html#model-step",
-                    type(getattr(self, 'model_data', None)),
+                    type(getattr(self, "model_data", None)),
                 )
 
-
     def _prepare_container_def(self):
         """Unified container definition preparation for all frameworks."""
-        if self.framework in [Framework.LDA, Framework.NTM, Framework.DJL, Framework.SPARKML] or self.framework is None:
+        if (
+            self.framework in [Framework.LDA, Framework.NTM, Framework.DJL, Framework.SPARKML]
+            or self.framework is None
+        ):
             return self._prepare_container_def_base()
 
         # Framework-specific validations
@@ -1407,68 +1864,84 @@ def _prepare_container_def(self):
 
             # Add framework-specific parameters
             if self.framework in [Framework.PYTORCH, Framework.MXNET, Framework.CHAINER]:
-                image_params["py_version"] = getattr(self, 'py_version', 'py3')
+                image_params["py_version"] = getattr(self, "py_version", "py3")
             elif self.framework == Framework.HUGGINGFACE:
-                image_params["py_version"] = getattr(self, 'py_version', 'py3')
+                image_params["py_version"] = getattr(self, "py_version", "py3")
                 # Use framework_version for both TensorFlow and PyTorch base versions
                 if "tensorflow" in self.framework_version.lower():
                     image_params["base_framework_version"] = f"tensorflow{self.framework_version}"
                 else:
                     image_params["base_framework_version"] = f"pytorch{self.framework_version}"
-                if hasattr(self, 'inference_tool') and self.inference_tool:
+                if hasattr(self, "inference_tool") and self.inference_tool:
                     image_params["inference_tool"] = self.inference_tool
             elif self.framework == Framework.SKLEARN:
-                image_params["py_version"] = getattr(self, 'py_version', 'py3')
+                image_params["py_version"] = getattr(self, "py_version", "py3")
 
             deploy_image = image_uris.retrieve(**image_params)
 
         # Code upload logic
         deploy_key_prefix = model_code_key_prefix(
-            getattr(self, 'key_prefix', None),
-            self.model_name,
-            deploy_image
+            getattr(self, "key_prefix", None), self.model_name, deploy_image
         )
 
         # Framework-specific repack logic
         repack_logic = {
-            Framework.PYTORCH: lambda: getattr(self, '_is_mms_version', lambda: False)(),
-            Framework.MXNET: lambda: getattr(self, '_is_mms_version', lambda: False)(),
+            Framework.PYTORCH: lambda: getattr(self, "_is_mms_version", lambda: False)(),
+            Framework.MXNET: lambda: getattr(self, "_is_mms_version", lambda: False)(),
             Framework.CHAINER: lambda: True,
-            Framework.XGBOOST: lambda: getattr(self, 'enable_network_isolation', lambda: False)(),
-            Framework.SKLEARN: lambda: getattr(self, 'enable_network_isolation', lambda: False)(),
+            Framework.XGBOOST: lambda: getattr(self, "enable_network_isolation", lambda: False)(),
+            Framework.SKLEARN: lambda: getattr(self, "enable_network_isolation", lambda: False)(),
             Framework.HUGGINGFACE: lambda: True,
             Framework.TENSORFLOW: lambda: False,  # TF has special logic
         }
 
         if self.framework == Framework.TENSORFLOW:
             # TensorFlow has special repack logic
-            self._handle_tf_repack(deploy_key_prefix, self.instance_type, self.serverless_inference_config)
+            self._handle_tf_repack(
+                deploy_key_prefix, self.instance_type, self.serverless_inference_config
+            )
         else:
             should_repack = repack_logic.get(self.framework, lambda: False)()
             self._upload_code(deploy_key_prefix, repack=should_repack)
 
         # Environment variables
-        deploy_env = dict(getattr(self, 'env_vars', getattr(self, 'env', {})))
+        deploy_env = dict(getattr(self, "env_vars", getattr(self, "env", {})))
 
         # Add script mode env vars for frameworks that support it
         if self.framework != Framework.TENSORFLOW:  # TF handles this differently
             deploy_env.update(self._script_mode_env_vars())
         elif self.framework == Framework.TENSORFLOW:
-            deploy_env = getattr(self, '_get_container_env', lambda: deploy_env)()
+            deploy_env = getattr(self, "_get_container_env", lambda: deploy_env)()
 
         # Add model server workers if supported
-        if hasattr(self, 'model_server_workers') and self.model_server_workers:
-            deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = to_string(self.model_server_workers)
+        if hasattr(self, "model_server_workers") and self.model_server_workers:
+            deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = to_string(
+                self.model_server_workers
+            )
 
         # Model data resolution
         model_data_resolvers = {
-            Framework.PYTORCH: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.MXNET: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.CHAINER: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.XGBOOST: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.SKLEARN: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.HUGGINGFACE: lambda: getattr(self, 'repacked_model_data', None) or getattr(self, 's3_upload_path', None) or getattr(self, 's3_model_data_url', None),
-            Framework.TENSORFLOW: lambda: getattr(self, 'model_data', None),  # TF still has special handling
+            Framework.PYTORCH: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.MXNET: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.CHAINER: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.XGBOOST: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.SKLEARN: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.HUGGINGFACE: lambda: getattr(self, "repacked_model_data", None)
+            or getattr(self, "s3_upload_path", None)
+            or getattr(self, "s3_model_data_url", None),
+            Framework.TENSORFLOW: lambda: getattr(
+                self, "model_data", None
+            ),  # TF still has special handling
         }
 
         model_data = model_data_resolvers[self.framework]()
@@ -1478,14 +1951,14 @@ def _prepare_container_def(self):
             "image_uri": deploy_image,
             "model_data_url": model_data,
             "env": deploy_env,
-            "accept_eula": getattr(self, 'accept_eula', None),
-            "model_reference_arn": getattr(self, 'model_reference_arn', None),
+            "accept_eula": getattr(self, "accept_eula", None),
+            "model_reference_arn": getattr(self, "model_reference_arn", None),
         }
 
         # Add optional parameters if they exist
-        if hasattr(self, 'image_config'):
+        if hasattr(self, "image_config"):
             container_params["image_config"] = self.image_config
-        if hasattr(self, 'additional_model_data_sources'):
+        if hasattr(self, "additional_model_data_sources"):
             container_params["additional_model_data_sources"] = self.additional_model_data_sources
 
         return container_def(**container_params)
@@ -1501,10 +1974,10 @@ def _prepare_pipeline_container_defs(self):
         containers = []
         for core_model in self.model:
             # Check if containers is set and is a list (not Unassigned)
-            if hasattr(core_model, 'containers') and isinstance(core_model.containers, list):
+            if hasattr(core_model, "containers") and isinstance(core_model.containers, list):
                 for c in core_model.containers:
                     containers.append(self._core_container_to_dict(c))
-            elif hasattr(core_model, 'primary_container') and core_model.primary_container:
+            elif hasattr(core_model, "primary_container") and core_model.primary_container:
                 containers.append(self._core_container_to_dict(core_model.primary_container))
         return containers
 
@@ -1521,9 +1994,9 @@ def get_value(obj, attr, default=None):
 
         return container_def(
             container.image,
-            get_value(container, 'model_data_url'),
-            get_value(container, 'environment', {}),
-            image_config=get_value(container, 'image_config'),
+            get_value(container, "model_data_url"),
+            get_value(container, "environment", {}),
+            image_config=get_value(container, "image_config"),
         )
 
     def _create_sagemaker_model(self):
@@ -1534,7 +2007,11 @@ def _create_sagemaker_model(self):
             # _base_name, model_name are not needed under PipelineSession.
             # the model_data may be Pipeline variable
             # which may break the _base_name generation
-            image_uri = container_def["Image"] if isinstance(container_def, dict) else container_def[0]["Image"]
+            image_uri = (
+                container_def["Image"]
+                if isinstance(container_def, dict)
+                else container_def[0]["Image"]
+            )
             self._ensure_base_name_if_needed(
                 image_uri=image_uri,
                 script_uri=self.source_dir,
@@ -1577,7 +2054,6 @@ def _create_sagemaker_model(self):
             return
         return Model.get(model_name=self.model_name, region=self.region)
 
-
     def _create_model(self):
         """Create a SageMaker Model instance from the current configuration."""
         if self._optimizing:
@@ -1590,21 +2066,22 @@ def _create_model(self):
 
         if self.mode == Mode.LOCAL_CONTAINER:
             from sagemaker.core.local.local_session import LocalSession
+
             local_session = LocalSession()
 
             primary_container = self._prepare_container_def()
             local_session.sagemaker_client.create_model(
                 ModelName=self.model_name,
                 PrimaryContainer=primary_container,
-                ExecutionRoleArn=execution_role
+                ExecutionRoleArn=execution_role,
             )
 
             return Model(
                 model_name=self.model_name,
                 primary_container=ContainerDefinition(
-                    image=primary_container['Image'],
-                    model_data_url=primary_container['ModelDataUrl'],
-                    environment=primary_container['Environment']
+                    image=primary_container["Image"],
+                    model_data_url=primary_container["ModelDataUrl"],
+                    environment=primary_container["Environment"],
                 ),
                 execution_role_arn=execution_role,
             )
@@ -1647,7 +2124,9 @@ def _create_model(self):
                 self._tags = add_jumpstart_uri_tags(
                     tags=self._tags,
                     inference_model_uri=(
-                        self.s3_model_data_url if isinstance(self.s3_model_data_url, (str, dict)) else None
+                        self.s3_model_data_url
+                        if isinstance(self.s3_model_data_url, (str, dict))
+                        else None
                     ),
                     inference_script_uri=self.source_dir,
                 )
@@ -1659,7 +2138,10 @@ def _create_model(self):
         else:
             raise ValueError(f"Invalid mode: {self.mode}")
 
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.fetch_endpoint_names_for_base_model")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION,
+        func_name="model_builder.fetch_endpoint_names_for_base_model",
+    )
     def fetch_endpoint_names_for_base_model(self) -> Set[str]:
         """Fetches endpoint names for the base model.
 
@@ -1667,9 +2149,14 @@ def fetch_endpoint_names_for_base_model(self) -> Set[str]:
             Set of endpoint names for the base model.
         """
         from sagemaker.core.resources import Tag as CoreTag
+
         if not self._is_model_customization():
-            raise ValueError("This functionality is only supported for Model Customization use cases")
-        recipe_name = self._fetch_model_package().inference_specification.containers[0].base_model.recipe_name
+            raise ValueError(
+                "This functionality is only supported for Model Customization use cases"
+            )
+        recipe_name = (
+            self._fetch_model_package().inference_specification.containers[0].base_model.recipe_name
+        )
         endpoint_names = set()
         logger.error(f"recipe_name: {recipe_name}")
         for inference_component in InferenceComponent.get_all():
@@ -1711,27 +2198,29 @@ def _build_single_modelbuilder(
         # Handle model customization (fine-tuned models)
         if self._is_model_customization():
             if mode is not None and mode != Mode.SAGEMAKER_ENDPOINT:
-                raise ValueError("Only SageMaker Endpoint Mode is supported for Model Customization use cases")
+                raise ValueError(
+                    "Only SageMaker Endpoint Mode is supported for Model Customization use cases"
+                )
             model_package = self._fetch_model_package()
             # Fetch recipe config first to set image_uri, instance_type, and s3_upload_path
             self._fetch_and_cache_recipe_config()
-            self.s3_upload_path = model_package.inference_specification.containers[0].model_data_source.s3_data_source.s3_uri
+            self.s3_upload_path = model_package.inference_specification.containers[
+                0
+            ].model_data_source.s3_data_source.s3_uri
             container_def = ContainerDefinition(
                 image=self.image_uri,
                 model_data_source={
                     "s3_data_source": {
                         "s3_uri": f"{self.s3_upload_path}/",
                         "s3_data_type": "S3Prefix",
-                        "compression_type": "None"
+                        "compression_type": "None",
                     }
-                }
+                },
             )
             model_name = self.model_name or f"model-{uuid.uuid4().hex[:10]}"
             # Create model
             self.built_model = Model.create(
-                execution_role_arn=self.role_arn,
-                model_name=model_name,
-                containers=[container_def]
+                execution_role_arn=self.role_arn, model_name=model_name, containers=[container_def]
             )
             return self.built_model
 
@@ -1743,7 +2232,10 @@ def _build_single_modelbuilder(
             self.model = None
         elif isinstance(self.model, ModelTrainer):
             # Check if this is a JumpStart ModelTrainer
-            if hasattr(self.model, '_jumpstart_config') and self.model._jumpstart_config is not None:
+            if (
+                hasattr(self.model, "_jumpstart_config")
+                and self.model._jumpstart_config is not None
+            ):
                 # For JumpStart ModelTrainer, extract model_id and route to JumpStart flow
                 jumpstart_config = self.model._jumpstart_config
                 self.model_path = self.model._latest_training_job.model_artifacts.s3_model_artifacts
@@ -1757,7 +2249,9 @@ def _build_single_modelbuilder(
                 self.model_path = self.model._latest_training_job.model_artifacts.s3_model_artifacts
                 self.model = None
 
-        self.sagemaker_session = sagemaker_session or self.sagemaker_session or self._create_session_with_region()
+        self.sagemaker_session = (
+            sagemaker_session or self.sagemaker_session or self._create_session_with_region()
+        )
         self.sagemaker_session.settings._local_download_dir = self.model_path
 
         client = self.sagemaker_session.sagemaker_client
@@ -1767,26 +2261,24 @@ def _build_single_modelbuilder(
 
         self._is_custom_image_uri = self.image_uri is not None
 
-
         self._handle_mlflow_input()
         self._build_validations()
 
-
         if self.env_vars.get("HUGGING_FACE_HUB_TOKEN") and not self.env_vars.get("HF_TOKEN"):
             self.env_vars["HF_TOKEN"] = self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
         elif self.env_vars.get("HF_TOKEN") and not self.env_vars.get("HUGGING_FACE_HUB_TOKEN"):
             self.env_vars["HUGGING_FACE_HUB_TOKEN"] = self.env_vars.get("HF_TOKEN")
 
-
-        if getattr(self, '_passthrough', False):
+        if getattr(self, "_passthrough", False):
             self.built_model = self._build_for_passthrough()
             return self.built_model
 
-        if self.model_server and not (isinstance(self.model, str) and self._is_jumpstart_model_id()):
+        if self.model_server and not (
+            isinstance(self.model, str) and self._is_jumpstart_model_id()
+        ):
             self.built_model = self._build_for_model_server()
             return self.built_model
 
-
         if isinstance(self.model, str):
             model_task = None
 
@@ -1801,7 +2293,6 @@ def _build_single_modelbuilder(
                 self.built_model = self._build_for_jumpstart()
                 return self.built_model
 
-
             if self.mode != Mode.IN_PROCESS and self._use_jumpstart_equivalent():
                 self.model_hub = ModelHub.JUMPSTART
                 logger.debug("Building for JumpStart equivalent model ID...")
@@ -1825,11 +2316,9 @@ def _build_single_modelbuilder(
                     if model_task is None:
                         model_task = hf_model_md.get("pipeline_tag")
 
-
                     if self.schema_builder is None and model_task is not None:
                         self._hf_schema_builder_init(model_task)
 
-
                     if model_task == "text-generation":
                         self.built_model = self._build_for_tgi()
                         return self.built_model
@@ -1840,8 +2329,9 @@ def _build_single_modelbuilder(
                         self.built_model = self._build_for_transformers()
                         return self.built_model
 
-            raise ValueError(f"Model {self.model} is not detected as HuggingFace or JumpStart model")
-
+            raise ValueError(
+                f"Model {self.model} is not detected as HuggingFace or JumpStart model"
+            )
 
         if not self.model_server:
             if self.image_uri and is_1p_image_uri(self.image_uri):
@@ -1859,22 +2349,23 @@ def _extract_and_extend_tags_from_model_trainer(self):
             return
 
         # Check if tags attribute exists and is not None
-        if not hasattr(self.model, 'tags') or not self.model.tags:
+        if not hasattr(self.model, "tags") or not self.model.tags:
             return
 
         jumpstart_tags = [
-            tag for tag in self.model.tags
-            if tag.key in ["sagemaker-sdk:jumpstart-model-id", "sagemaker-sdk:jumpstart-model-version"]
+            tag
+            for tag in self.model.tags
+            if tag.key
+            in ["sagemaker-sdk:jumpstart-model-id", "sagemaker-sdk:jumpstart-model-version"]
         ]
 
         self._tags.extend(jumpstart_tags)
 
-
     def _deploy_local_endpoint(self, **kwargs):
         """Deploy the built model to a local endpoint."""
 
         # Extract parameters
-        endpoint_name = kwargs.get("endpoint_name", getattr(self, 'endpoint_name', None))
+        endpoint_name = kwargs.get("endpoint_name", getattr(self, "endpoint_name", None))
         if "endpoint_name" in kwargs:
             self.endpoint_name = endpoint_name
 
@@ -1883,13 +2374,12 @@ def _deploy_local_endpoint(self, **kwargs):
         endpoint_name = endpoint_name or self.endpoint_name
 
         from sagemaker.core.local.local_session import LocalSession
+
         local_session = LocalSession()
         endpoint_exists = False
 
         try:
-            _ = local_session.sagemaker_client.describe_endpoint(
-                EndpointName=endpoint_name
-            )
+            _ = local_session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
             endpoint_exists = True
         except Exception:
             endpoint_exists = False
@@ -1906,22 +2396,27 @@ def _deploy_local_endpoint(self, **kwargs):
                 local_container_mode_obj=self.modes[str(Mode.LOCAL_CONTAINER)],
                 serializer=self._serializer,
                 deserializer=self._deserializer,
-                container_config=self.container_config
-                )
+                container_config=self.container_config,
+            )
         else:
             if update_endpoint:
-                raise NotImplementedError("Update endpoint is not supported in local mode (V2 parity)")
-            else:
-                return LocalEndpoint.get(
-                    endpoint_name=endpoint_name,
-                    local_session=local_session
+                raise NotImplementedError(
+                    "Update endpoint is not supported in local mode (V2 parity)"
                 )
+            else:
+                return LocalEndpoint.get(endpoint_name=endpoint_name, local_session=local_session)
 
-    def _wait_for_endpoint(self, endpoint, poll=30, live_logging=False, show_progress=True, wait=True):
+    def _wait_for_endpoint(
+        self, endpoint, poll=30, live_logging=False, show_progress=True, wait=True
+    ):
         """Enhanced wait with rich progress bar and status logging"""
         if not wait:
-            logger.info("🚀 Deployment started: Endpoint '%s' using %s in %s mode (deployment in progress)",
-                       endpoint, self.model_server, self.mode)
+            logger.info(
+                "🚀 Deployment started: Endpoint '%s' using %s in %s mode (deployment in progress)",
+                endpoint,
+                self.model_server,
+                self.mode,
+            )
             return
 
         # Use the ModelBuilder's sagemaker_session client (which has correct region)
@@ -1931,21 +2426,31 @@ def _wait_for_endpoint(self, endpoint, poll=30, live_logging=False, show_progres
             from sagemaker.serve.deployment_progress import (
                 EndpointDeploymentProgress,
                 _deploy_done_with_progress,
-                _live_logging_deploy_done_with_progress
+                _live_logging_deploy_done_with_progress,
             )
 
             with EndpointDeploymentProgress(endpoint) as progress:
                 # Check if we have permission for live logging
                 from sagemaker.core.helper.session_helper import _has_permission_for_live_logging
+
                 if _has_permission_for_live_logging(self.sagemaker_session.boto_session, endpoint):
                     # Use live logging with Rich progress tracker
                     cloudwatch_client = self.sagemaker_session.boto_session.client("logs")
                     paginator = cloudwatch_client.get_paginator("filter_log_events")
-                    from sagemaker.core.helper.session_helper import create_paginator_config, EP_LOGGER_POLL
+                    from sagemaker.core.helper.session_helper import (
+                        create_paginator_config,
+                        EP_LOGGER_POLL,
+                    )
+
                     paginator_config = create_paginator_config()
                     desc = _wait_until(
                         lambda: _live_logging_deploy_done_with_progress(
-                            sagemaker_client, endpoint, paginator, paginator_config, EP_LOGGER_POLL, progress
+                            sagemaker_client,
+                            endpoint,
+                            paginator,
+                            paginator_config,
+                            EP_LOGGER_POLL,
+                            progress,
                         ),
                         poll=EP_LOGGER_POLL,
                     )
@@ -1953,7 +2458,7 @@ def _wait_for_endpoint(self, endpoint, poll=30, live_logging=False, show_progres
                     # Fallback to status-only progress
                     desc = _wait_until(
                         lambda: _deploy_done_with_progress(sagemaker_client, endpoint, progress),
-                        poll
+                        poll,
                     )
         else:
             # Existing implementation
@@ -1962,67 +2467,92 @@ def _wait_for_endpoint(self, endpoint, poll=30, live_logging=False, show_progres
         # Check final endpoint status and log accordingly
         try:
             endpoint_desc = sagemaker_client.describe_endpoint(EndpointName=endpoint)
-            endpoint_status = endpoint_desc['EndpointStatus']
-            if endpoint_status == 'InService':
-                endpoint_arn_info = f" (ARN: {endpoint_desc['EndpointArn']})" if self.mode == Mode.SAGEMAKER_ENDPOINT else ""
-                logger.info("✅ Deployment successful: Endpoint '%s' using %s in %s mode%s",
-                           endpoint, self.model_server, self.mode, endpoint_arn_info)
+            endpoint_status = endpoint_desc["EndpointStatus"]
+            if endpoint_status == "InService":
+                endpoint_arn_info = (
+                    f" (ARN: {endpoint_desc['EndpointArn']})"
+                    if self.mode == Mode.SAGEMAKER_ENDPOINT
+                    else ""
+                )
+                logger.info(
+                    "✅ Deployment successful: Endpoint '%s' using %s in %s mode%s",
+                    endpoint,
+                    self.model_server,
+                    self.mode,
+                    endpoint_arn_info,
+                )
             else:
-                logger.error("❌ Deployment failed: Endpoint '%s' status is '%s'", endpoint, endpoint_status)
+                logger.error(
+                    "❌ Deployment failed: Endpoint '%s' status is '%s'", endpoint, endpoint_status
+                )
         except Exception as e:
             logger.error("❌ Deployment failed: Unable to verify endpoint status - %s", str(e))
 
         return desc
 
-
     def _deploy_core_endpoint(self, **kwargs):
         # Extract and update self parameters
-        initial_instance_count = kwargs.get("initial_instance_count", getattr(self, 'instance_count', None))
+        initial_instance_count = kwargs.get(
+            "initial_instance_count", getattr(self, "instance_count", None)
+        )
         if "initial_instance_count" in kwargs:
             self.instance_count = initial_instance_count
 
-        instance_type = kwargs.get("instance_type", getattr(self, 'instance_type', None))
+        instance_type = kwargs.get("instance_type", getattr(self, "instance_type", None))
         if "instance_type" in kwargs:
             self.instance_type = instance_type
 
-        accelerator_type = kwargs.get("accelerator_type", getattr(self, 'accelerator_type', None))
+        accelerator_type = kwargs.get("accelerator_type", getattr(self, "accelerator_type", None))
         if "accelerator_type" in kwargs:
             self.accelerator_type = accelerator_type
 
-        endpoint_name = kwargs.get("endpoint_name", getattr(self, 'endpoint_name', None))
+        endpoint_name = kwargs.get("endpoint_name", getattr(self, "endpoint_name", None))
         if "endpoint_name" in kwargs:
             self.endpoint_name = endpoint_name
 
-        tags = kwargs.get("tags", getattr(self, '_tags', None))
+        tags = kwargs.get("tags", getattr(self, "_tags", None))
         if "tags" in kwargs:
             self._tags = tags
 
-        kms_key = kwargs.get("kms_key", getattr(self, 'kms_key', None))
+        kms_key = kwargs.get("kms_key", getattr(self, "kms_key", None))
         if "kms_key" in kwargs:
             self.kms_key = kms_key
 
-        async_inference_config = kwargs.get("async_inference_config", getattr(self, 'async_inference_config', None))
+        async_inference_config = kwargs.get(
+            "async_inference_config", getattr(self, "async_inference_config", None)
+        )
         if "async_inference_config" in kwargs:
             self.async_inference_config = async_inference_config
 
-        serverless_inference_config = kwargs.get("serverless_inference_config", getattr(self, 'serverless_inference_config', None))
+        serverless_inference_config = kwargs.get(
+            "serverless_inference_config", getattr(self, "serverless_inference_config", None)
+        )
         if "serverless_inference_config" in kwargs:
             self.serverless_inference_config = serverless_inference_config
 
-        model_data_download_timeout = kwargs.get("model_data_download_timeout", getattr(self, 'model_data_download_timeout', None))
+        model_data_download_timeout = kwargs.get(
+            "model_data_download_timeout", getattr(self, "model_data_download_timeout", None)
+        )
         if "model_data_download_timeout" in kwargs:
             self.model_data_download_timeout = model_data_download_timeout
 
-        resources = kwargs.get("resources", getattr(self, 'resource_requirements', None))
+        resources = kwargs.get("resources", getattr(self, "resource_requirements", None))
         if "resources" in kwargs:
             self.resource_requirements = resources
 
-        inference_component_name = kwargs.get("inference_component_name", getattr(self, 'inference_component_name', None))
+        inference_component_name = kwargs.get(
+            "inference_component_name", getattr(self, "inference_component_name", None)
+        )
         if "inference_component_name" in kwargs:
             self.inference_component_name = inference_component_name
 
-        container_startup_health_check_timeout = kwargs.get("container_startup_health_check_timeout", getattr(self, 'container_startup_health_check_timeout', None))
-        inference_ami_version = kwargs.get("inference_ami_version", getattr(self, 'inference_ami_version', None))
+        container_startup_health_check_timeout = kwargs.get(
+            "container_startup_health_check_timeout",
+            getattr(self, "container_startup_health_check_timeout", None),
+        )
+        inference_ami_version = kwargs.get(
+            "inference_ami_version", getattr(self, "inference_ami_version", None)
+        )
 
         serializer = kwargs.get("serializer", None)
         deserializer = kwargs.get("deserializer", None)
@@ -2052,8 +2582,10 @@ def _deploy_core_endpoint(self, **kwargs):
 
         routing_config = _resolve_routing_config(routing_config)
 
-
-        if (inference_recommendation_id is not None or self.inference_recommender_job_results is not None):
+        if (
+            inference_recommendation_id is not None
+            or self.inference_recommender_job_results is not None
+        ):
             instance_type, initial_instance_count = self._update_params(
                 instance_type=instance_type,
                 initial_instance_count=initial_instance_count,
@@ -2065,7 +2597,6 @@ def _deploy_core_endpoint(self, **kwargs):
                 inference_recommender_job_results=self.inference_recommender_job_results,
             )
 
-
         is_async = async_inference_config is not None
         if is_async and not isinstance(async_inference_config, AsyncInferenceConfig):
             raise ValueError("async_inference_config needs to be a AsyncInferenceConfig object")
@@ -2076,11 +2607,14 @@ def _deploy_core_endpoint(self, **kwargs):
 
         is_serverless = serverless_inference_config is not None
         if not is_serverless and not (instance_type and initial_instance_count):
-            raise ValueError("Must specify instance type and instance count unless using serverless inference")
+            raise ValueError(
+                "Must specify instance type and instance count unless using serverless inference"
+            )
 
         if is_serverless and not isinstance(serverless_inference_config, ServerlessInferenceConfig):
-            raise ValueError("serverless_inference_config needs to be a ServerlessInferenceConfig object")
-
+            raise ValueError(
+                "serverless_inference_config needs to be a ServerlessInferenceConfig object"
+            )
 
         if self._is_sharded_model:
             if endpoint_type != EndpointType.INFERENCE_COMPONENT_BASED:
@@ -2161,7 +2695,6 @@ def _deploy_core_endpoint(self, **kwargs):
                     "Fast Model Loading. Configure by setting `num_cpus` to 0 in `resources`."
                 )
 
-
         if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
             if update_endpoint:
                 raise ValueError(
@@ -2188,7 +2721,6 @@ def _deploy_core_endpoint(self, **kwargs):
                 else:
                     managed_instance_scaling_config["MinInstanceCount"] = initial_instance_count
 
-
             if not self.sagemaker_session.endpoint_in_service_or_not(self.endpoint_name):
                 production_variant = session_helper.production_variant(
                     instance_type=instance_type,
@@ -2214,11 +2746,10 @@ def _deploy_core_endpoint(self, **kwargs):
                 )
                 self._wait_for_endpoint(endpoint=self.endpoint_name, show_progress=True, wait=wait)
 
-
             core_endpoint = Endpoint.get(
                 endpoint_name=self.endpoint_name,
                 session=self.sagemaker_session.boto_session,
-                region=self.region
+                region=self.region,
             )
 
             # [TODO]: Refactor to a module
@@ -2343,20 +2874,19 @@ def _deploy_core_endpoint(self, **kwargs):
             core_endpoint = Endpoint.get(
                 endpoint_name=self.endpoint_name,
                 session=self.sagemaker_session.boto_session,
-                region=self.region
+                region=self.region,
             )
 
             return core_endpoint
 
-
     def _deploy(self, **kwargs):
-        self.accept_eula = kwargs.get("accept_eula", getattr(self, 'accept_eula', False))
-        self.built_model = kwargs.get("built_model", getattr(self, 'built_model', None))
+        self.accept_eula = kwargs.get("accept_eula", getattr(self, "accept_eula", False))
+        self.built_model = kwargs.get("built_model", getattr(self, "built_model", None))
 
-        if not hasattr(self, 'built_model') or self.built_model is None:
+        if not hasattr(self, "built_model") or self.built_model is None:
             raise ValueError("Must call build() before deploy()")
 
-        if hasattr(self, 'model_server') and self.model_server:
+        if hasattr(self, "model_server") and self.model_server:
             wrapper_method = self._get_deploy_wrapper()
             if wrapper_method:
                 endpoint = wrapper_method(**kwargs)
@@ -2377,14 +2907,13 @@ def _deploy(self, **kwargs):
                 in_process_mode_obj=self.modes[str(Mode.IN_PROCESS)],
                 serializer=self._serializer,
                 deserializer=self._deserializer,
-                container_config=self.container_config
-                )
+                container_config=self.container_config,
+            )
         else:
             raise ValueError(f"Deployment mode {self.mode} not supported")
 
         return endpoint
 
-
     def _get_deploy_wrapper(self):
         """Get the appropriate deploy wrapper method for the current model server."""
         if isinstance(self.model, str) and self._is_jumpstart_model_id():
@@ -2409,14 +2938,19 @@ def _does_ic_exist(self, ic_name: str) -> bool:
         except ClientError as e:
             return "Could not find inference component" not in e.response["Error"]["Message"]
 
-
-    def _update_inference_component(self, ic_name: str, resource_requirements: ResourceRequirements, **kwargs):
+    def _update_inference_component(
+        self, ic_name: str, resource_requirements: ResourceRequirements, **kwargs
+    ):
         """Update existing inference component."""
         startup_parameters = {}
         if kwargs.get("model_data_download_timeout"):
-            startup_parameters["ModelDataDownloadTimeoutInSeconds"] = kwargs["model_data_download_timeout"]
+            startup_parameters["ModelDataDownloadTimeoutInSeconds"] = kwargs[
+                "model_data_download_timeout"
+            ]
         if kwargs.get("container_timeout_in_seconds"):
-            startup_parameters["ContainerStartupHealthCheckTimeoutInSeconds"] = kwargs["container_timeout_in_seconds"]
+            startup_parameters["ContainerStartupHealthCheckTimeoutInSeconds"] = kwargs[
+                "container_timeout_in_seconds"
+            ]
 
         compute_rr = resource_requirements.get_compute_resource_requirements()
         inference_component_spec = {
@@ -2429,15 +2963,10 @@ def _update_inference_component(self, ic_name: str, resource_requirements: Resou
         return self.sagemaker_session.update_inference_component(
             inference_component_name=ic_name,
             specification=inference_component_spec,
-            runtime_config=runtime_config
+            runtime_config=runtime_config,
         )
 
-    def _deploy_for_ic(
-        self,
-        ic_data: Dict[str, Any],
-        endpoint_name: str,
-        **kwargs
-    ) -> Endpoint:
+    def _deploy_for_ic(self, ic_data: Dict[str, Any], endpoint_name: str, **kwargs) -> Endpoint:
         """Deploy/update inference component and return V3 Endpoint."""
         ic_name = ic_data.get("Name")
         resource_requirements = ic_data.get("ResourceRequirements")
@@ -2450,7 +2979,7 @@ def _deploy_for_ic(
             return Endpoint.get(
                 endpoint_name=endpoint_name,
                 session=self.sagemaker_session.boto_session,
-                region=self.region
+                region=self.region,
             )
         else:
             # Create new IC via _deploy()
@@ -2460,9 +2989,9 @@ def _deploy_for_ic(
                 endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
                 resources=resource_requirements,
                 inference_component_name=ic_name,
-                instance_type=kwargs.get('instance_type', self.instance_type),
-                initial_instance_count=kwargs.get('initial_instance_count', 1),
-                **kwargs
+                instance_type=kwargs.get("instance_type", self.instance_type),
+                initial_instance_count=kwargs.get("initial_instance_count", 1),
+                **kwargs,
             )
 
     def _reset_build_state(self):
@@ -2472,39 +3001,44 @@ def _reset_build_state(self):
         self.secret_key = ""
 
         # JumpStart preparation flags
-        for attr in ['prepared_for_djl', 'prepared_for_tgi', 'prepared_for_mms']:
+        for attr in ["prepared_for_djl", "prepared_for_tgi", "prepared_for_mms"]:
             if hasattr(self, attr):
                 delattr(self, attr)
 
         # JumpStart cached data
-        for attr in ['js_model_config', 'existing_properties', '_cached_js_model_specs', '_cached_is_jumpstart']:
+        for attr in [
+            "js_model_config",
+            "existing_properties",
+            "_cached_js_model_specs",
+            "_cached_is_jumpstart",
+        ]:
             if hasattr(self, attr):
                 delattr(self, attr)
 
         # HuggingFace cached data
-        if hasattr(self, 'hf_model_config'):
-            delattr(self, 'hf_model_config')
+        if hasattr(self, "hf_model_config"):
+            delattr(self, "hf_model_config")
 
         # Mode and serving state
-        if hasattr(self, 'modes'):
-            delattr(self, 'modes')
-        if hasattr(self, 'serve_settings'):
-            delattr(self, 'serve_settings')
+        if hasattr(self, "modes"):
+            delattr(self, "modes")
+        if hasattr(self, "serve_settings"):
+            delattr(self, "serve_settings")
 
         # Serialization state
-        for attr in ['_serializer', '_deserializer']:
+        for attr in ["_serializer", "_deserializer"]:
             if hasattr(self, attr):
                 delattr(self, attr)
 
         # Upload/packaging state
         self.s3_model_data_url = None
         self.s3_upload_path = None
-        for attr in ['uploaded_code', 'repacked_model_data']:
+        for attr in ["uploaded_code", "repacked_model_data"]:
             if hasattr(self, attr):
                 delattr(self, attr)
 
         # Image and passthrough flags
-        for attr in ['_is_custom_image_uri', '_passthrough']:
+        for attr in ["_is_custom_image_uri", "_passthrough"]:
             if hasattr(self, attr):
                 delattr(self, attr)
 
@@ -2552,7 +3086,7 @@ def build(
             >>> endpoint = model_builder.deploy()  # Creates Endpoint resource
             >>> result = endpoint.invoke(data=input_data)
         """
-        if hasattr(self, 'built_model') and self.built_model is not None:
+        if hasattr(self, "built_model") and self.built_model is not None:
             logger.warning(
                 "ModelBuilder.build() has already been called. "
                 "Reusing ModelBuilder objects is not recommended and may cause issues. "
@@ -2574,21 +3108,25 @@ def build(
             logger.debug("Updating role_arn during build()")
             self.role_arn = role_arn
 
-        self.model_name = model_name or getattr(self, 'model_name', None)
-        self.mode = mode or getattr(self, 'mode', None)
-        self.instance_type = getattr(self, 'instance_type', None)
-        self.s3_model_data_url = getattr(self, 's3_model_data_url', None)
-        self.sagemaker_session = sagemaker_session or getattr(self, 'sagemaker_session', None) or self._create_session_with_region()
-        self.framework = getattr(self, 'framework', None)
-        self.framework_version = getattr(self, 'framework_version', None)
-        self.git_config = getattr(self, 'git_config', None)
-        self.model_kms_key = getattr(self, 'model_kms_key', None)
-        self.model_server_workers = getattr(self, 'model_server_workers', None)
-        self.serverless_inference_config = getattr(self, 'serverless_inference_config', None)
-        self.accelerator_type = getattr(self, 'accelerator_type', None)
-        self.model_reference_arn = getattr(self, 'model_reference_arn', None)
-        self.accept_eula = getattr(self, 'accept_eula', None)
-        self.container_log_level = getattr(self, 'container_log_level', None)
+        self.model_name = model_name or getattr(self, "model_name", None)
+        self.mode = mode or getattr(self, "mode", None)
+        self.instance_type = getattr(self, "instance_type", None)
+        self.s3_model_data_url = getattr(self, "s3_model_data_url", None)
+        self.sagemaker_session = (
+            sagemaker_session
+            or getattr(self, "sagemaker_session", None)
+            or self._create_session_with_region()
+        )
+        self.framework = getattr(self, "framework", None)
+        self.framework_version = getattr(self, "framework_version", None)
+        self.git_config = getattr(self, "git_config", None)
+        self.model_kms_key = getattr(self, "model_kms_key", None)
+        self.model_server_workers = getattr(self, "model_server_workers", None)
+        self.serverless_inference_config = getattr(self, "serverless_inference_config", None)
+        self.accelerator_type = getattr(self, "accelerator_type", None)
+        self.model_reference_arn = getattr(self, "model_reference_arn", None)
+        self.accept_eula = getattr(self, "accept_eula", None)
+        self.container_log_level = getattr(self, "container_log_level", None)
 
         deployables = {}
 
@@ -2601,16 +3139,24 @@ def build(
                 role_arn=self.role_arn,
                 sagemaker_session=self.sagemaker_session,
             )
-            model_arn_info = f" (ARN: {self.built_model.model_arn})" if self.mode == Mode.SAGEMAKER_ENDPOINT and hasattr(self.built_model, 'model_arn') else ""
-            logger.info("✅ Model has been created: '%s' using server %s in %s mode%s", self.model_name, self.model_server, self.mode, model_arn_info)
+            model_arn_info = (
+                f" (ARN: {self.built_model.model_arn})"
+                if self.mode == Mode.SAGEMAKER_ENDPOINT and hasattr(self.built_model, "model_arn")
+                else ""
+            )
+            logger.info(
+                "✅ Model has been created: '%s' using server %s in %s mode%s",
+                self.model_name,
+                self.model_server,
+                self.mode,
+                model_arn_info,
+            )
             return model
 
-
         built_ic_models = []
         if self.modelbuilder_list:
             logger.debug("Detected ModelBuilders in modelbuilder_list.")
 
-
             for mb in self.modelbuilder_list:
                 if mb.mode == Mode.IN_PROCESS or mb.mode == Mode.LOCAL_CONTAINER:
                     raise ValueError(
@@ -2628,14 +3174,12 @@ def build(
                         + "and custom orchestrators."
                     )
 
-
             for mb in self.modelbuilder_list:
 
                 mb.serve_settings = mb._get_serve_setting()
 
                 logger.debug("Building ModelBuilder %s.", mb.model_name)
 
-
                 mb = mb._get_inference_component_resource_requirements(mb=mb)
 
                 built_model = mb._build_single_modelbuilder(
@@ -2648,11 +3192,20 @@ def build(
                         "Model": built_model,
                     }
                 )
-                model_arn_info = f" (ARN: {mb.built_model.model_arn})" if mb.mode == Mode.SAGEMAKER_ENDPOINT and hasattr(mb.built_model, 'model_arn') else ""
-                logger.info("✅ Model build successful: '%s' using server %s in %s mode%s", mb.model_name, mb.model_server, mb.mode, model_arn_info)
+                model_arn_info = (
+                    f" (ARN: {mb.built_model.model_arn})"
+                    if mb.mode == Mode.SAGEMAKER_ENDPOINT and hasattr(mb.built_model, "model_arn")
+                    else ""
+                )
+                logger.info(
+                    "✅ Model build successful: '%s' using server %s in %s mode%s",
+                    mb.model_name,
+                    mb.model_server,
+                    mb.mode,
+                    model_arn_info,
+                )
             deployables["InferenceComponents"] = built_ic_models
 
-
         if isinstance(self.inference_spec, (CustomOrchestrator, AsyncCustomOrchestrator)):
             logger.debug("Building custom orchestrator.")
             if self.mode == Mode.IN_PROCESS or self.mode == Mode.LOCAL_CONTAINER:
@@ -2702,12 +3255,19 @@ def build(
                     "Model": built_orchestrator,
                 }
 
-            logger.info("✅ Custom orchestrator build successful: '%s' using server %s in %s mode", self.model_name, self.model_server, self.mode)
+            logger.info(
+                "✅ Custom orchestrator build successful: '%s' using server %s in %s mode",
+                self.model_name,
+                self.model_server,
+                self.mode,
+            )
 
         self._deployables = deployables
         return self
 
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.configure_for_torchserve")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.configure_for_torchserve"
+    )
     def configure_for_torchserve(
         self,
         shared_libs: Optional[List[str]] = None,
@@ -2725,9 +3285,10 @@ def configure_for_torchserve(
         self.model_server = ModelServer.TORCHSERVE
         return self
 
-
     @classmethod
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.from_jumpstart_config")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.from_jumpstart_config"
+    )
     def from_jumpstart_config(
         cls,
         jumpstart_config: JumpStartConfig,
@@ -2840,17 +3401,18 @@ def from_jumpstart_config(
         mb_instance.resource_requirements = resource_requirements
         mb_instance.model_kms_key = model_kms_key
         mb_instance.hub_name = jumpstart_config.hub_name
-        mb_instance.config_name=jumpstart_config.inference_config_name
+        mb_instance.config_name = jumpstart_config.inference_config_name
         mb_instance.accept_eula = jumpstart_config.accept_eula
-        mb_instance.tolerate_vulnerable_model=tolerate_vulnerable_model
-        mb_instance.tolerate_deprecated_model=tolerate_deprecated_model
-        mb_instance.model_data_download_timeout=deploy_kwargs.get("model_data_download_timeout")
-        mb_instance.container_startup_health_check_timeout=deploy_kwargs.get("container_startup_health_check_timeout")
-        mb_instance.inference_ami_version=deploy_kwargs.get("inference_ami_version")
+        mb_instance.tolerate_vulnerable_model = tolerate_vulnerable_model
+        mb_instance.tolerate_deprecated_model = tolerate_deprecated_model
+        mb_instance.model_data_download_timeout = deploy_kwargs.get("model_data_download_timeout")
+        mb_instance.container_startup_health_check_timeout = deploy_kwargs.get(
+            "container_startup_health_check_timeout"
+        )
+        mb_instance.inference_ami_version = deploy_kwargs.get("inference_ami_version")
 
         return mb_instance
 
-
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.transformer")
     def transformer(
         self,
@@ -2901,7 +3463,7 @@ def transformer(
         tags = format_tags(tags)
 
         # Ensure model has been built
-        if not hasattr(self, 'built_model') or self.built_model is None:
+        if not hasattr(self, "built_model") or self.built_model is None:
             raise ValueError("Must call build() before creating transformer")
 
         # Network isolation disables custom environment variables
@@ -2926,8 +3488,9 @@ def transformer(
             sagemaker_session=self.sagemaker_session,
         )
 
-
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.display_benchmark_metrics")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.display_benchmark_metrics"
+    )
     def display_benchmark_metrics(self, **kwargs) -> None:
         """Display benchmark metrics for JumpStart models."""
         if not isinstance(self.model, str):
@@ -2943,33 +3506,31 @@ def display_benchmark_metrics(self, **kwargs) -> None:
         else:
             raise ValueError("This model does not have benchmark metrics available")
 
-
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.set_deployment_config")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.set_deployment_config"
+    )
     def set_deployment_config(self, config_name: str, instance_type: str) -> None:
         """Sets the deployment config to apply to the model."""
         if not isinstance(self.model, str):
-            raise ValueError("Deployment config is only supported for JumpStart or HuggingFace models")
+            raise ValueError(
+                "Deployment config is only supported for JumpStart or HuggingFace models"
+            )
 
         if not (self._is_jumpstart_model_id() or self._use_jumpstart_equivalent()):
             raise ValueError(f"The deployment config {config_name} cannot be set on this model")
 
-
         self.config_name = config_name
         self.instance_type = instance_type
 
-
         self._deployment_config = None
 
-
         self._deployment_config = self.get_deployment_config()
 
-
         if self._deployment_config:
             deployment_args = self._deployment_config.get("DeploymentArgs", {})
             if deployment_args.get("AdditionalDataSources"):
                 self.additional_model_data_sources = deployment_args["AdditionalDataSources"]
 
-
         if self.additional_model_data_sources:
             self.speculative_decoding_draft_model_source = "sagemaker"
             self.add_tags({"Key": Tag.SPECULATIVE_DRAFT_MODEL_PROVIDER, "Value": "sagemaker"})
@@ -2977,21 +3538,22 @@ def set_deployment_config(self, config_name: str, instance_type: str) -> None:
             self.remove_tag_with_key(Tag.FINE_TUNING_MODEL_PATH)
             self.remove_tag_with_key(Tag.FINE_TUNING_JOB_NAME)
 
-
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.get_deployment_config")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.get_deployment_config"
+    )
     def get_deployment_config(self) -> Optional[Dict[str, Any]]:
         """Gets the deployment config to apply to the model."""
         if not isinstance(self.model, str):
-            raise ValueError("Deployment config is only supported for JumpStart or HuggingFace models")
+            raise ValueError(
+                "Deployment config is only supported for JumpStart or HuggingFace models"
+            )
 
         if not (self._is_jumpstart_model_id() or self._use_jumpstart_equivalent()):
             raise ValueError("This model does not have any deployment config yet")
 
-
         if self.config_name is None:
             return None
 
-
         if self._deployment_config is None:
 
             for config in self.list_deployment_configs():
@@ -3001,23 +3563,23 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]:
 
         return self._deployment_config
 
-
-    @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.list_deployment_configs")
+    @_telemetry_emitter(
+        feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.list_deployment_configs"
+    )
     def list_deployment_configs(self) -> List[Dict[str, Any]]:
         """List deployment configs for the model in the current region."""
         if not isinstance(self.model, str):
-            raise ValueError("Deployment config is only supported for JumpStart or HuggingFace models")
+            raise ValueError(
+                "Deployment config is only supported for JumpStart or HuggingFace models"
+            )
 
         if not (self._is_jumpstart_model_id() or self._use_jumpstart_equivalent()):
             raise ValueError("Deployment config is only supported for JumpStart models")
 
-
         return self.deployment_config_response_data(
             self._get_deployment_configs(self.config_name, self.instance_type)
         )  # Delegate to JumpStart builder
 
-
-
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.optimize")
     # Add these methods to the current V3 ModelBuilder class:
     def optimize(
@@ -3107,7 +3669,9 @@ def optimize(
 
         # Update parameters if provided
         if region and region != self.region:
-            logger.warning("Changing region from '%s' to '%s' during optimize()", self.region, region)
+            logger.warning(
+                "Changing region from '%s' to '%s' during optimize()", self.region, region
+            )
             self.region = region
             self.sagemaker_session = self._create_session_with_region()
 
@@ -3119,12 +3683,12 @@ def optimize(
         if sagemaker_session:
             self.sagemaker_session = sagemaker_session
 
-        self.model_name = model_name or getattr(self, 'model_name', None)
-        self.framework = getattr(self, 'framework', None)
-        self.framework_version = getattr(self, 'framework_version', None)
-        self.accept_eula = accept_eula or getattr(self, 'accept_eula', None)
-        self.instance_type = instance_type or getattr(self, 'instance_type', None)
-        self.container_log_level = getattr(self, 'container_log_level', None)
+        self.model_name = model_name or getattr(self, "model_name", None)
+        self.framework = getattr(self, "framework", None)
+        self.framework_version = getattr(self, "framework_version", None)
+        self.accept_eula = accept_eula or getattr(self, "accept_eula", None)
+        self.instance_type = instance_type or getattr(self, "instance_type", None)
+        self.container_log_level = getattr(self, "container_log_level", None)
         self.serve_settings = self._get_serve_setting()
 
         self._optimizing = True
@@ -3148,7 +3712,6 @@ def optimize(
             sagemaker_session=sagemaker_session,
         )
 
-
     def _model_builder_optimize_wrapper(
         self,
         output_path: Optional[str] = None,
@@ -3191,8 +3754,8 @@ def _model_builder_optimize_wrapper(
 
         self.is_compiled = compilation_config is not None
         self.is_quantized = quantization_config is not None
-        self.speculative_decoding_draft_model_source = self._extract_speculative_draft_model_provider(
-            speculative_decoding_config
+        self.speculative_decoding_draft_model_source = (
+            self._extract_speculative_draft_model_provider(speculative_decoding_config)
         )
 
         if self.mode != Mode.SAGEMAKER_ENDPOINT:
@@ -3230,7 +3793,9 @@ def _model_builder_optimize_wrapper(
 
         # Validate and set region
         if region and region != self.region:
-            logger.warning("Changing region from '%s' to '%s' during optimize()", self.region, region)
+            logger.warning(
+                "Changing region from '%s' to '%s' during optimize()", self.region, region
+            )
             self.region = region
             # Recreate session with new region
             self.sagemaker_session = self._create_session_with_region()
@@ -3240,7 +3805,9 @@ def _model_builder_optimize_wrapper(
             logger.debug("Updating role_arn during optimize()")
             self.role_arn = role_arn
 
-        self.sagemaker_session = sagemaker_session or self.sagemaker_session or self._create_session_with_region()
+        self.sagemaker_session = (
+            sagemaker_session or self.sagemaker_session or self._create_session_with_region()
+        )
         self.instance_type = instance_type or self.instance_type
 
         job_name = job_name or f"modelbuilderjob-{uuid.uuid4().hex}"
@@ -3248,8 +3815,7 @@ def _model_builder_optimize_wrapper(
         if self._is_jumpstart_model_id():
             # Build using V3 method instead of self.build()
             self.built_model = self._build_single_modelbuilder(
-                mode=self.mode,
-                sagemaker_session=self.sagemaker_session
+                mode=self.mode, sagemaker_session=self.sagemaker_session
             )
             # Set deployment config on built_model if needed
             input_args = self._optimize_for_jumpstart(
@@ -3275,8 +3841,7 @@ def _model_builder_optimize_wrapper(
 
             # Build using V3 method instead of self.build()
             self.built_model = self._build_single_modelbuilder(
-                mode=self.mode,
-                sagemaker_session=self.sagemaker_session
+                mode=self.mode, sagemaker_session=self.sagemaker_session
             )
             input_args = self._optimize_for_hf(
                 output_path=output_path,
@@ -3298,14 +3863,12 @@ def _model_builder_optimize_wrapper(
         if input_args:
             optimization_instance_type = input_args["DeploymentInstanceType"]
 
-
             gpu_instance_families = ["g5", "g6", "p4d", "p4de", "p5"]
             is_gpu_instance = optimization_instance_type and any(
                 gpu_instance_family in optimization_instance_type
                 for gpu_instance_family in gpu_instance_families
             )
 
-
             is_llama_3_plus = self.model and bool(
                 re.search(r"llama-3[\.\-][1-9]\d*", self.model.lower())
             )
@@ -3335,7 +3898,6 @@ def _model_builder_optimize_wrapper(
         self.built_model = self._create_model()
         return self.built_model
 
-
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.deploy")
     def deploy(
         self,
@@ -3399,7 +3961,7 @@ def deploy(
             >>> endpoint = model_builder.deploy(endpoint_name="my-endpoint")  # Creates Endpoint resource
             >>> result = endpoint.invoke(data=input_data)  # Make predictions
         """
-        if hasattr(self, '_deployed') and self._deployed:
+        if hasattr(self, "_deployed") and self._deployed:
             logger.warning(
                 "ModelBuilder.deploy() has already been called. "
                 "Reusing ModelBuilder objects for multiple deployments is not recommended. "
@@ -3415,13 +3977,20 @@ def deploy(
             logger.info("Deploying Model Customization model")
             if not self.instance_type and not instance_type:
                 self.instance_type = self._fetch_default_instance_type_for_custom_model()
+
+            # Pass inference_config if it's ResourceRequirements
+            inference_config_param = None
+            if isinstance(inference_config, ResourceRequirements):
+                inference_config_param = inference_config
+
             return self._deploy_model_customization(
                 endpoint_name=endpoint_name,
                 instance_type=instance_type or self.instance_type,
                 initial_instance_count=initial_instance_count,
                 wait=wait,
                 container_timeout_in_seconds=container_timeout_in_seconds,
-                **kwargs
+                inference_config=inference_config_param,
+                **kwargs,
             )
 
         if not update_endpoint:
@@ -3500,8 +4069,7 @@ def deploy(
             deploy_kwargs.update(kwargs)
             return self._deploy(**deploy_kwargs)
 
-
-        if hasattr(self, '_deployables') and self._deployables:
+        if hasattr(self, "_deployables") and self._deployables:
             endpoints = []
             for ic in self._deployables.get("InferenceComponents", []):
                 endpoints.append(self._deploy_for_ic(ic_data=ic, endpoint_name=endpoint_name))
@@ -3554,13 +4122,13 @@ def deploy(
 
         raise ValueError("Deployment Options not supported")
 
-
     def _deploy_model_customization(
         self,
         endpoint_name: str,
         initial_instance_count: int = 1,
         inference_component_name: Optional[str] = None,
-        **kwargs
+        inference_config: Optional[ResourceRequirements] = None,
+        **kwargs,
     ) -> Endpoint:
         """Deploy a model customization (fine-tuned) model to an endpoint with inference components.
 
@@ -3577,12 +4145,17 @@ def _deploy_model_customization(
             wait (bool): Whether to wait for deployment to complete (default: True)
             container_timeout_in_seconds (int): Container timeout in seconds (default: 300)
             inference_component_name (Optional[str]): Name for the inference component
+            inference_config (Optional[ResourceRequirements]): Inference configuration including
+                resource requirements (accelerator count, memory, CPU cores)
             **kwargs: Additional deployment parameters
 
         Returns:
             Endpoint: The deployed sagemaker.core.resources.Endpoint
         """
-        from sagemaker.core.resources import Model as CoreModel, EndpointConfig as CoreEndpointConfig
+        from sagemaker.core.resources import (
+            Model as CoreModel,
+            EndpointConfig as CoreEndpointConfig,
+        )
         from sagemaker.core.shapes import ContainerDefinition, ProductionVariant
         from sagemaker.core.shapes import (
             InferenceComponentSpecification,
@@ -3590,7 +4163,7 @@ def _deploy_model_customization(
             InferenceComponentRuntimeConfig,
             InferenceComponentComputeResourceRequirements,
             ModelDataSource,
-            S3ModelDataSource
+            S3ModelDataSource,
         )
         from sagemaker.core.resources import InferenceComponent
         from sagemaker.core.utils.utils import Unassigned
@@ -3607,15 +4180,19 @@ def _deploy_model_customization(
         if not is_existing_endpoint:
             EndpointConfig.create(
                 endpoint_config_name=endpoint_name,
-                production_variants=[ProductionVariant(
-                    variant_name=endpoint_name,
-                    instance_type=self.instance_type,
-                    initial_instance_count=initial_instance_count or 1
-                )],
-                execution_role_arn=self.role_arn
+                production_variants=[
+                    ProductionVariant(
+                        variant_name=endpoint_name,
+                        instance_type=self.instance_type,
+                        initial_instance_count=initial_instance_count or 1,
+                    )
+                ],
+                execution_role_arn=self.role_arn,
             )
             logger.info("Endpoint core call starting")
-            endpoint = Endpoint.create(endpoint_name=endpoint_name, endpoint_config_name=endpoint_name)
+            endpoint = Endpoint.create(
+                endpoint_name=endpoint_name, endpoint_config_name=endpoint_name
+            )
             endpoint.wait_for_status("InService")
         else:
             endpoint = Endpoint.get(endpoint_name=endpoint_name)
@@ -3629,35 +4206,70 @@ def _deploy_model_customization(
 
         # Get PEFT type and base model recipe name
         peft_type = self._fetch_peft()
-        base_model_recipe_name = model_package.inference_specification.containers[0].base_model.recipe_name
+        base_model_recipe_name = model_package.inference_specification.containers[
+            0
+        ].base_model.recipe_name
         base_inference_component_name = None
         tag = None
 
+        # Resolve the correct model artifact URI based on deployment type
+        artifact_url = self._resolve_model_artifact_uri()
+
+        # Determine if this is a base model deployment
+        # A base model deployment uses HostingArtifactUri from JumpStart (not from model package)
+        is_base_model_deployment = False
+        if artifact_url and not peft_type:
+            # Check if artifact_url comes from JumpStart (not from model package)
+            # If model package has model_data_source, it's a full fine-tuned model
+            if (
+                hasattr(model_package.inference_specification.containers[0], "model_data_source")
+                and model_package.inference_specification.containers[0].model_data_source
+            ):
+                is_base_model_deployment = False  # Full fine-tuned model
+            else:
+                is_base_model_deployment = True  # Base model from JumpStart
+
         # Handle tagging and base component lookup
-        if not is_existing_endpoint:
+        if not is_existing_endpoint and is_base_model_deployment:
+            # Only tag as "Base" if we're actually deploying a base model
             from sagemaker.core.resources import Tag as CoreTag
+
             tag = CoreTag(key="Base", value=base_model_recipe_name)
         elif peft_type == "LORA":
+            # For LORA adapters, look up the existing base component
             from sagemaker.core.resources import Tag as CoreTag
-            for component in InferenceComponent.get_all(endpoint_name_equals=endpoint_name, status_equals="InService"):
+
+            for component in InferenceComponent.get_all(
+                endpoint_name_equals=endpoint_name, status_equals="InService"
+            ):
                 component_tags = CoreTag.get_all(resource_arn=component.inference_component_arn)
-                if any(t.key == "Base" and t.value == base_model_recipe_name for t in component_tags):
+                if any(
+                    t.key == "Base" and t.value == base_model_recipe_name for t in component_tags
+                ):
                     base_inference_component_name = component.inference_component_name
                     break
 
-        artifact_url = None #if peft_type == "LORA" else self._fetch_model_package().inference_specification.containers[0].model_data_source.s3_data_source.s3_uri
-
         ic_spec = InferenceComponentSpecification(
             container=InferenceComponentContainerSpecification(
-                image=self.image_uri,
-                artifact_url=artifact_url,
-                environment=self.env_vars
+                image=self.image_uri, artifact_url=artifact_url, environment=self.env_vars
             )
         )
 
         if peft_type == "LORA":
             ic_spec.base_inference_component_name = base_inference_component_name
-        ic_spec.compute_resource_requirements = self._cached_compute_requirements
+
+        # Use inference_config if provided, otherwise fall back to cached requirements
+        if inference_config is not None:
+            # Extract compute requirements from inference_config (ResourceRequirements)
+            ic_spec.compute_resource_requirements = InferenceComponentComputeResourceRequirements(
+                min_memory_required_in_mb=inference_config.min_memory,
+                max_memory_required_in_mb=inference_config.max_memory,
+                number_of_cpu_cores_required=inference_config.num_cpus,
+                number_of_accelerator_devices_required=inference_config.num_accelerators,
+            )
+        else:
+            # Fall back to resolved compute requirements from build()
+            ic_spec.compute_resource_requirements = self._cached_compute_requirements
 
         InferenceComponent.create(
             inference_component_name=inference_component_name,
@@ -3665,7 +4277,7 @@ def _deploy_model_customization(
             variant_name=endpoint_name,
             specification=ic_spec,
             runtime_config=InferenceComponentRuntimeConfig(copy_count=1),
-            tags=[{"key": tag.key, "value": tag.value}] if tag else []
+            tags=[{"key": tag.key, "value": tag.value}] if tag else [],
         )
 
         # Create lineage tracking for new endpoints
@@ -3673,15 +4285,20 @@ def _deploy_model_customization(
             from sagemaker.core.resources import Action, Association, Artifact
             from sagemaker.core.shapes import ActionSource, MetadataProperties
 
-            inference_component = InferenceComponent.get(inference_component_name=inference_component_name)
+            inference_component = InferenceComponent.get(
+                inference_component_name=inference_component_name
+            )
 
             action = Action.create(
-                source=ActionSource(source_uri=self._fetch_model_package_arn(),
-                                    source_type="SageMaker"),
+                source=ActionSource(
+                    source_uri=self._fetch_model_package_arn(), source_type="SageMaker"
+                ),
                 action_name=f"{endpoint_name}-action",
                 action_type="ModelDeployment",
                 properties={"EndpointConfigName": endpoint_name},
-                metadata_properties=MetadataProperties(generated_by=inference_component.inference_component_arn)
+                metadata_properties=MetadataProperties(
+                    generated_by=inference_component.inference_component_arn
+                ),
             )
 
             artifacts = Artifact.get_all(source_uri=model_package.model_package_arn)
@@ -3702,7 +4319,11 @@ def _fetch_peft(self) -> Optional[str]:
             return None
 
         from sagemaker.core.utils.utils import Unassigned
-        if training_job.serverless_job_config != Unassigned() and training_job.serverless_job_config.job_spec != Unassigned():
+
+        if (
+            training_job.serverless_job_config != Unassigned()
+            and training_job.serverless_job_config.job_spec != Unassigned()
+        ):
             return training_job.serverless_job_config.job_spec.get("PEFT")
         return None
 
@@ -3712,24 +4333,21 @@ def _does_endpoint_exist(self, endpoint_name: str) -> bool:
             Endpoint.get(endpoint_name=endpoint_name)
             return True
         except ClientError as e:
-            if e.response['Error']['Code'] == 'ValidationException':
+            if e.response["Error"]["Code"] == "ValidationException":
                 return False
             raise
 
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.deploy_local")
     def deploy_local(
-        self,
-        endpoint_name: str = "endpoint", 
-        container_timeout_in_seconds: int = 300,
-        **kwargs
+        self, endpoint_name: str = "endpoint", container_timeout_in_seconds: int = 300, **kwargs
     ) -> LocalEndpoint:
         """Deploy the built model to local mode for testing.
-        
+
         Deploys the model locally using either LOCAL_CONTAINER mode (runs in a Docker container)
         or IN_PROCESS mode (runs in the current Python process). This is useful for testing and
         development before deploying to SageMaker endpoints. The model must be built with
         mode=Mode.LOCAL_CONTAINER or mode=Mode.IN_PROCESS before calling this method.
-        
+
         Note: This returns a ``LocalEndpoint`` object for local inference, not a SageMaker
         Endpoint resource. Use local_endpoint.invoke() to make predictions.
 
@@ -3739,10 +4357,10 @@ def deploy_local(
                 to respond to requests. (Default: 300).
         Returns:
             LocalEndpoint: A ``LocalEndpoint`` object for making local predictions.
-            
+
         Raises:
             ValueError: If the model was not built with LOCAL_CONTAINER or IN_PROCESS mode.
-            
+
         Example:
             >>> model_builder = ModelBuilder(
             ...     model=my_model,
@@ -3754,14 +4372,16 @@ def deploy_local(
             >>> result = local_endpoint.invoke(data=input_data)
         """
         if self.mode not in [Mode.LOCAL_CONTAINER, Mode.IN_PROCESS]:
-            raise ValueError(f"deploy_local() only supports LOCAL_CONTAINER and IN_PROCESS modes, got {self.mode}")
-        
+            raise ValueError(
+                f"deploy_local() only supports LOCAL_CONTAINER and IN_PROCESS modes, got {self.mode}"
+            )
+
         return self.deploy(
             endpoint_name=endpoint_name,
             container_timeout_in_seconds=container_timeout_in_seconds,
-            **kwargs
+            **kwargs,
         )
-    
+
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.register")
     @runnable_by_pipeline
     def register(
@@ -3847,7 +4467,7 @@ def register(
             A `sagemaker.model.ModelPackage` instance or pipeline step arguments
             in case the Model instance is built with
             :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
-        
+
         Note:
             The following parameters are inherited from ModelBuilder.__init__ and do not need
             to be passed to register():
@@ -3905,7 +4525,7 @@ def register(
 
         # Ensure container_def_list is always a list
         container_def_list = container_def if isinstance(container_def, list) else [container_def]
-        
+
         model_pkg_args = get_model_package_args(
             self.content_types,
             self.response_types,
@@ -3932,8 +4552,7 @@ def register(
         )
 
         model_package_response = create_model_package_from_containers(
-            self.sagemaker_session,
-            **model_pkg_args
+            self.sagemaker_session, **model_pkg_args
         )
 
         if isinstance(self.sagemaker_session, PipelineSession):
@@ -3947,4 +4566,3 @@ def register(
         )
 
         return model_package_response.get("ModelPackageArn")
-
diff --git a/sagemaker-serve/test_script.py b/sagemaker-serve/test_script.py
new file mode 100644
index 0000000000..1a88c2ec26
--- /dev/null
+++ b/sagemaker-serve/test_script.py
@@ -0,0 +1 @@
+print("Hello from processing script!")
diff --git a/sagemaker-serve/tests/unit/test_artifact_path_propagation.py b/sagemaker-serve/tests/unit/test_artifact_path_propagation.py
new file mode 100644
index 0000000000..89b511ab83
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_artifact_path_propagation.py
@@ -0,0 +1,408 @@
+"""
+Unit tests to verify artifact path propagation to CreateInferenceComponent API.
+Tests that _resolve_model_artifact_uri is called and its result is used in deployment.
+
+Requirements: 4.3, 4.4
+Task: 5.4
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock, call
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+
+
+class TestArtifactPathPropagation(unittest.TestCase):
+    """Test artifact path propagation to CreateInferenceComponent - Requirements 4.3, 4.4"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._resolve_model_artifact_uri")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_base_model_artifact_uri_propagated_to_inference_component(
+        self,
+        mock_is_model_customization,
+        mock_fetch_peft,
+        mock_resolve_artifact,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+    ):
+        """Test that base model artifact URI is propagated to InferenceComponent.create."""
+        # Setup: Model customization deployment
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Artifact URI resolution returns JumpStart HostingArtifactUri
+        expected_artifact_uri = "s3://jumpstart-bucket/base-model/artifacts.tar.gz"
+        mock_resolve_artifact.return_value = expected_artifact_uri
+
+        # Setup: Model package
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        # Setup: Endpoint mocks
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Create ModelBuilder
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Mark as built with cached compute requirements
+        builder.built_model = Mock()
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=16384,
+            number_of_cpu_cores_required=8.0,
+            number_of_accelerator_devices_required=4.0,
+        )
+
+        # Execute: Deploy
+        builder._deploy_model_customization(endpoint_name="test-endpoint", initial_instance_count=1)
+
+        # Verify: _resolve_model_artifact_uri was called
+        assert mock_resolve_artifact.called
+
+        # Verify: InferenceComponent.create was called with correct artifact_url
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+
+        # Extract the specification
+        ic_spec = call_kwargs["specification"]
+
+        # Verify artifact_url matches the resolved URI
+        assert ic_spec.container.artifact_url == expected_artifact_uri
+
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._resolve_model_artifact_uri")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_fine_tuned_model_artifact_uri_propagated_to_inference_component(
+        self,
+        mock_is_model_customization,
+        mock_fetch_peft,
+        mock_resolve_artifact,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+    ):
+        """Test that fine-tuned model artifact URI is propagated to InferenceComponent.create."""
+        # Setup: Model customization deployment
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Artifact URI resolution returns None for fine-tuned models
+        mock_resolve_artifact.return_value = None
+
+        # Setup: Model package
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        # Setup: Endpoint mocks
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Create ModelBuilder
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Mark as built with cached compute requirements
+        builder.built_model = Mock()
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=16384,
+            number_of_cpu_cores_required=8.0,
+            number_of_accelerator_devices_required=4.0,
+        )
+
+        # Execute: Deploy
+        builder._deploy_model_customization(endpoint_name="test-endpoint", initial_instance_count=1)
+
+        # Verify: _resolve_model_artifact_uri was called
+        assert mock_resolve_artifact.called
+
+        # Verify: InferenceComponent.create was called with correct artifact_url
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+
+        # Extract the specification
+        ic_spec = call_kwargs["specification"]
+
+        # Verify artifact_url is None for fine-tuned models (model data handled by recipe)
+        assert ic_spec.container.artifact_url is None
+
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    @patch("sagemaker.core.resources.Tag.get_all")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._resolve_model_artifact_uri")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_lora_adapter_no_artifact_uri_propagated(
+        self,
+        mock_is_model_customization,
+        mock_fetch_peft,
+        mock_resolve_artifact,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_get,
+        mock_tag_get_all,
+        mock_ic_get_all,
+        mock_ic_create,
+    ):
+        """Test that LORA adapters have None artifact_url (no artifact needed)."""
+        # Setup: Model customization deployment with LORA adapter
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "LORA"
+
+        # Setup: Artifact URI resolution returns None for LORA
+        mock_resolve_artifact.return_value = None
+
+        # Setup: Model package
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        # Setup: Existing endpoint with base component
+        mock_endpoint = Mock()
+        mock_endpoint_get.return_value = mock_endpoint
+
+        # Setup: Base inference component
+        mock_base_component = Mock()
+        mock_base_component.inference_component_name = "base-component"
+        mock_base_component.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/base"
+        )
+        mock_ic_get_all.return_value = [mock_base_component]
+
+        # Setup: Tags for base component
+        mock_tag = Mock()
+        mock_tag.key = "Base"
+        mock_tag.value = "test-recipe"
+        mock_tag_get_all.return_value = [mock_tag]
+
+        # Create ModelBuilder
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Mark as built with cached compute requirements
+        builder.built_model = Mock()
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=16384,
+            number_of_cpu_cores_required=8.0,
+            number_of_accelerator_devices_required=4.0,
+        )
+
+        # Execute: Deploy to existing endpoint (LORA adapter)
+        builder._deploy_model_customization(endpoint_name="test-endpoint", initial_instance_count=1)
+
+        # Verify: _resolve_model_artifact_uri was called
+        assert mock_resolve_artifact.called
+
+        # Verify: InferenceComponent.create was called with artifact_url=None
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+
+        # Extract the specification
+        ic_spec = call_kwargs["specification"]
+
+        # Verify artifact_url is None for LORA adapters
+        assert ic_spec.container.artifact_url is None
+
+        # Verify base_inference_component_name is set
+        assert ic_spec.base_inference_component_name == "base-component"
+
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._resolve_model_artifact_uri")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_environment_variables_propagated_with_artifact_path(
+        self,
+        mock_is_model_customization,
+        mock_fetch_peft,
+        mock_resolve_artifact,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+    ):
+        """Test that environment variables are propagated along with artifact path."""
+        # Setup: Model customization deployment
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Artifact URI resolution
+        expected_artifact_uri = "s3://jumpstart-bucket/base-model/artifacts.tar.gz"
+        mock_resolve_artifact.return_value = expected_artifact_uri
+
+        # Setup: Model package
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        # Setup: Endpoint mocks
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Create ModelBuilder with custom environment variables
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+            env_vars={"CUSTOM_VAR": "custom_value", "MODEL_TIMEOUT": "300"},
+        )
+
+        # Mark as built with cached compute requirements
+        builder.built_model = Mock()
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=16384,
+            number_of_cpu_cores_required=8.0,
+            number_of_accelerator_devices_required=4.0,
+        )
+
+        # Execute: Deploy
+        builder._deploy_model_customization(endpoint_name="test-endpoint", initial_instance_count=1)
+
+        # Verify: InferenceComponent.create was called
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+
+        # Extract the specification
+        ic_spec = call_kwargs["specification"]
+
+        # Verify both artifact_url and environment variables are set
+        assert ic_spec.container.artifact_url == expected_artifact_uri
+        assert ic_spec.container.environment == builder.env_vars
+        assert ic_spec.container.environment["CUSTOM_VAR"] == "custom_value"
+        assert ic_spec.container.environment["MODEL_TIMEOUT"] == "300"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sagemaker-serve/tests/unit/test_artifact_path_resolution.py b/sagemaker-serve/tests/unit/test_artifact_path_resolution.py
new file mode 100644
index 0000000000..9fe4132957
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_artifact_path_resolution.py
@@ -0,0 +1,485 @@
+"""
+Unit tests for ModelBuilder artifact path resolution.
+Tests the _resolve_model_artifact_uri method with various scenarios.
+
+Requirements: 7.3
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+
+
+class TestArtifactPathResolution(unittest.TestCase):
+    """Test artifact path resolution - Requirements 7.3"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_base_model_artifact_uri_retrieval(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package, mock_fetch_hub
+    ):
+        """Test base model artifact URI retrieval from JumpStart metadata."""
+        # Setup: Base model (not LORA, not full fine-tuned)
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"  # Not LORA
+
+        # Setup: Model package with base_model but no model_data_source
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        # No model_data_source attribute (base model)
+        mock_container.model_data_source = None
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document with HostingArtifactUri
+        mock_fetch_hub.return_value = {
+            "HostingArtifactUri": "s3://jumpstart-bucket/base-model/artifacts.tar.gz"
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return HostingArtifactUri from JumpStart
+        assert artifact_uri == "s3://jumpstart-bucket/base-model/artifacts.tar.gz"
+        assert mock_fetch_hub.called
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_fine_tuned_adapter_artifact_location(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package
+    ):
+        """Test fine-tuned model returns None (model data handled by recipe/container)."""
+        # Setup: Full fine-tuned model (not LORA)
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Model package with model_data_source (fine-tuned model)
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_s3_data_source = Mock()
+        mock_s3_data_source.s3_uri = "s3://my-bucket/fine-tuned-model/model.tar.gz"
+        mock_model_data_source = Mock()
+        mock_model_data_source.s3_data_source = mock_s3_data_source
+        mock_container.model_data_source = mock_model_data_source
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Fine-tuned models return None - model data is handled by recipe/container
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_lora_adapter_returns_none(self, mock_is_model_customization, mock_fetch_peft):
+        """Test that LORA adapters return None (no artifact URI needed)."""
+        # Setup: LORA adapter
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "LORA"
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None for LORA adapters
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_missing_hosting_artifact_uri_returns_none(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package, mock_fetch_hub
+    ):
+        """Test error handling when HostingArtifactUri is missing from metadata."""
+        # Setup: Base model without HostingArtifactUri
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Model package with base_model but no model_data_source
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_container.model_data_source = None
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document WITHOUT HostingArtifactUri
+        mock_fetch_hub.return_value = {}
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None and log warning
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_hub_document_fetch_exception_handling(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package, mock_fetch_hub
+    ):
+        """Test error handling when hub document fetch fails."""
+        # Setup: Base model
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Model package with base_model
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_container.model_data_source = None
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document fetch raises exception
+        mock_fetch_hub.side_effect = Exception("Hub service unavailable")
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute - should not raise exception
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None and log warning
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_non_model_customization_returns_none(self, mock_is_model_customization):
+        """Test that non-model-customization deployments return None."""
+        # Setup: Not a model customization deployment
+        mock_is_model_customization.return_value = False
+
+        builder = ModelBuilder(
+            model="my-model",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",  # Provide instance_type to avoid auto-detection
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None for non-model-customization
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_missing_model_package_returns_none(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package
+    ):
+        """Test error handling when model package is not available."""
+        # Setup: Model customization but no model package
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+        mock_fetch_package.return_value = None
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None when model package is unavailable
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_missing_inference_specification_returns_none(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package
+    ):
+        """Test error handling when model package has no inference specification."""
+        # Setup: Model package without inference_specification
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_package = Mock()
+        mock_package.inference_specification = None
+        mock_fetch_package.return_value = mock_package
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None when inference_specification is missing
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_empty_containers_list_returns_none(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package
+    ):
+        """Test error handling when containers list is empty."""
+        # Setup: Model package with empty containers list
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_package = Mock()
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = []
+        mock_fetch_package.return_value = mock_package
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None when containers list is empty
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_base_model_without_base_model_attribute_returns_none(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package, mock_fetch_hub
+    ):
+        """Test error handling when container has no base_model attribute."""
+        # Setup: Container without base_model attribute
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.model_data_source = None
+        mock_container.base_model = None  # No base_model
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return None when base_model is not present
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_fine_tuned_model_with_nested_s3_data_source(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package
+    ):
+        """Test fine-tuned model with nested s3_data_source returns None."""
+        # Setup: Full fine-tuned model with nested structure
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Properly nested model_data_source structure
+        mock_package = Mock()
+        mock_container = Mock()
+
+        mock_s3_data_source = Mock()
+        mock_s3_data_source.s3_uri = "s3://custom-bucket/my-fine-tuned-model/artifacts.tar.gz"
+
+        mock_model_data_source = Mock()
+        mock_model_data_source.s3_data_source = mock_s3_data_source
+
+        mock_container.model_data_source = mock_model_data_source
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Fine-tuned models return None - model data handled by recipe/container
+        assert artifact_uri is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    def test_base_model_with_multiple_hosting_artifact_uris(
+        self, mock_is_model_customization, mock_fetch_peft, mock_fetch_package, mock_fetch_hub
+    ):
+        """Test base model retrieval when hub document has HostingArtifactUri."""
+        # Setup: Base model
+        mock_is_model_customization.return_value = True
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Model package with base_model
+        mock_package = Mock()
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-recipe"
+        mock_container.model_data_source = None
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document with HostingArtifactUri
+        mock_fetch_hub.return_value = {
+            "HostingArtifactUri": "s3://jumpstart-cache/base-model-v2/model.tar.gz",
+            "HostingEcrUri": "123456789012.dkr.ecr.us-west-2.amazonaws.com/jumpstart:latest",
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        artifact_uri = builder._resolve_model_artifact_uri()
+
+        # Verify: Should return HostingArtifactUri
+        assert artifact_uri == "s3://jumpstart-cache/base-model-v2/model.tar.gz"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sagemaker-serve/tests/unit/test_compute_requirements_resolution.py b/sagemaker-serve/tests/unit/test_compute_requirements_resolution.py
new file mode 100644
index 0000000000..8d1bba8aff
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_compute_requirements_resolution.py
@@ -0,0 +1,984 @@
+"""
+Unit tests for ModelBuilder compute requirements resolution.
+Tests the _resolve_compute_requirements method with various scenarios.
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+from sagemaker.core.inference_config import ResourceRequirements
+from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+
+class TestComputeRequirementsResolution(unittest.TestCase):
+    """Test compute requirements resolution - Requirements 2.1, 3.1, 3.2, 3.4"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_resolve_with_defaults_only(self, mock_get_resources, mock_fetch_hub):
+        """Test resolving compute requirements with only JumpStart defaults."""
+        # Setup: Hub document with default compute requirements
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)  # 8 CPUs, 32GB RAM
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.2xlarge", user_resource_requirements=None
+        )
+
+        # Verify: Should use safe default memory (1024), CPUs from metadata
+        assert requirements.number_of_cpu_cores_required == 4
+        assert requirements.min_memory_required_in_mb == 1024
+        # Check that accelerator count is not set (should be Unassigned)
+        from sagemaker.core.utils.utils import Unassigned
+
+        assert isinstance(requirements.number_of_accelerator_devices_required, Unassigned)
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_resolve_with_user_override(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test that user-provided requirements take precedence over defaults."""
+        # Setup: Mock GPU detection for g5.12xlarge
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = 4
+
+        # Setup: Hub document with defaults
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User provides custom requirements
+        user_requirements = ResourceRequirements(
+            requests={"num_cpus": 8, "memory": 16384, "num_accelerators": 2},
+            limits={"memory": 32768},
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.g5.12xlarge", user_resource_requirements=user_requirements
+        )
+
+        # Verify: Should use user-provided values
+        assert requirements.number_of_cpu_cores_required == 8
+        assert requirements.min_memory_required_in_mb == 16384
+        assert requirements.max_memory_required_in_mb == 32768
+        assert requirements.number_of_accelerator_devices_required == 2
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_resolve_with_partial_user_override(self, mock_get_resources, mock_fetch_hub):
+        """Test merging user requirements with defaults (partial override)."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User only overrides memory
+        user_requirements = ResourceRequirements(requests={"memory": 16384})
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.2xlarge", user_resource_requirements=user_requirements
+        )
+
+        # Verify: Should use user memory, default CPUs
+        assert requirements.number_of_cpu_cores_required == 4  # From default
+        assert requirements.min_memory_required_in_mb == 16384  # From user
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_infer_accelerator_count_for_gpu_instance(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test automatic accelerator count inference for GPU instances."""
+        # Setup: Mock GPU detection for g5.12xlarge
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = 4
+
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                        # No accelerator count specified
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (48, 196608)  # g5.12xlarge specs
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.g5.12xlarge", user_resource_requirements=None
+        )
+
+        # Verify: Should automatically infer 4 GPUs for g5.12xlarge
+        assert requirements.number_of_accelerator_devices_required == 4
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_error_when_gpu_instance_accelerator_count_unknown(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test error when GPU instance type has unknown accelerator count."""
+        # Setup: Mock GPU detection - GPU instance but unknown count
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = None
+
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute with a real GPU instance type that's not in our mapping
+        # Use ml.g5.metal which is a valid GPU instance pattern but not in our map
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.g5.metal",  # Valid GPU pattern but not in mapping
+                user_resource_requirements=None,
+            )
+
+        # Verify error message
+        error_msg = str(exc_info.value)
+        assert "requires accelerator device count specification" in error_msg
+        assert "ResourceRequirements" in error_msg
+        assert "num_accelerators" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_validate_cpu_requirements_exceed_instance_capacity(
+        self, mock_get_resources, mock_fetch_hub
+    ):
+        """Test validation error when CPU requirements exceed instance capacity."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (2, 8192)  # Only 2 CPUs available
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more CPUs than available
+        user_requirements = ResourceRequirements(
+            requests={"num_cpus": 16, "memory": 8192}  # More than available
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.t3.small", user_resource_requirements=user_requirements
+            )
+
+        error_msg = str(exc_info.value)
+        assert "Resource requirements incompatible" in error_msg
+        assert "16 CPUs" in error_msg
+        assert "2 CPUs" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_validate_memory_requirements_exceed_instance_capacity(
+        self, mock_get_resources, mock_fetch_hub
+    ):
+        """Test validation error when memory requirements exceed instance capacity."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 8192)  # Only 8GB RAM
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more memory than available
+        user_requirements = ResourceRequirements(
+            requests={"num_cpus": 4, "memory": 32768}  # More than available
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.m5.large", user_resource_requirements=user_requirements
+            )
+
+        error_msg = str(exc_info.value)
+        assert "Resource requirements incompatible" in error_msg
+        assert "32768 MB memory" in error_msg
+        assert "8192 MB memory" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_adjust_cpu_count_when_default_exceeds_capacity(
+        self, mock_get_resources, mock_fetch_hub
+    ):
+        """Test automatic CPU adjustment when default exceeds instance capacity."""
+        # Setup: Default requests 8 CPUs but instance only has 4
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 8,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (4, 16384)  # Only 4 CPUs
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",  # Provide instance_type to avoid auto-detection
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.xlarge", user_resource_requirements=None
+        )
+
+        # Verify: Should adjust to instance capacity
+        assert requirements.number_of_cpu_cores_required == 4
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_adjust_memory_when_default_exceeds_capacity(self, mock_get_resources, mock_fetch_hub):
+        """Test that default memory is 1024 MB regardless of metadata value."""
+        # Setup: Metadata requests 32GB but we use safe default of 1024
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 32768,  # This is ignored for defaults
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 8192)  # Only 8GB RAM
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.large",
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.large", user_resource_requirements=None
+        )
+
+        # Verify: Uses safe default of 1024, not metadata value
+        assert requirements.min_memory_required_in_mb == 1024
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_missing_hosting_configs_error(self, mock_get_resources, mock_fetch_hub):
+        """Test error when hub document has no hosting configs."""
+        # Setup: No hosting configs
+        mock_fetch_hub.return_value = {}
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.m5.xlarge", user_resource_requirements=None
+            )
+
+        error_msg = str(exc_info.value)
+        assert "Unable to resolve compute requirements" in error_msg
+        assert "does not have hosting configuration" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_various_gpu_instance_types(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test accelerator count inference for various GPU instance types."""
+        # Setup: Mock GPU detection
+        mock_is_gpu.return_value = True
+        gpu_count_map = {
+            "ml.g5.xlarge": 1,
+            "ml.g5.12xlarge": 4,
+            "ml.g5.48xlarge": 8,
+            "ml.p3.2xlarge": 1,
+            "ml.p3.8xlarge": 4,
+            "ml.p4d.24xlarge": 8,
+            "ml.g4dn.xlarge": 1,
+            "ml.g4dn.12xlarge": 4,
+        }
+        mock_infer_accel.side_effect = lambda it: gpu_count_map.get(it)
+
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Test various GPU instance types
+        test_cases = [
+            ("ml.g5.xlarge", 1),
+            ("ml.g5.12xlarge", 4),
+            ("ml.g5.48xlarge", 8),
+            ("ml.p3.2xlarge", 1),
+            ("ml.p3.8xlarge", 4),
+            ("ml.p4d.24xlarge", 8),
+            ("ml.g4dn.xlarge", 1),
+            ("ml.g4dn.12xlarge", 4),
+        ]
+
+        for instance_type, expected_gpus in test_cases:
+            mock_get_resources.return_value = (8, 32768)
+
+            requirements = builder._resolve_compute_requirements(
+                instance_type=instance_type, user_resource_requirements=None
+            )
+
+            assert (
+                requirements.number_of_accelerator_devices_required == expected_gpus
+            ), f"Expected {expected_gpus} GPUs for {instance_type}, got {requirements.number_of_accelerator_devices_required}"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_cpu_instance_no_accelerator_count(self, mock_get_resources, mock_fetch_hub):
+        """Test that CPU instances don't get accelerator count."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute with CPU instance
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.2xlarge", user_resource_requirements=None
+        )
+
+        # Verify: Should not have accelerator count
+        from sagemaker.core.utils.utils import Unassigned
+
+        assert isinstance(requirements.number_of_accelerator_devices_required, Unassigned)
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_default_accelerator_count_from_metadata(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test using default accelerator count from JumpStart metadata."""
+        # Setup: Mock GPU detection for g5.12xlarge
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = 4
+
+        # Setup: Metadata includes accelerator count
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                        "NumberOfAcceleratorDevicesRequired": 2,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.g5.12xlarge", user_resource_requirements=None
+        )
+
+        # Verify: Should use metadata value, not inferred value
+        assert requirements.number_of_accelerator_devices_required == 2
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_missing_accelerator_count_for_unknown_gpu_instance(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test error when GPU instance type has no accelerator count in metadata or mapping."""
+        # Setup: Mock GPU detection - GPU instance but unknown count
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = None
+
+        # Setup: No accelerator count in metadata
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                        # No NumberOfAcceleratorDevicesRequired
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute with a GPU instance not in the mapping
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.g5.unknown", user_resource_requirements=None  # Not in mapping
+            )
+
+        # Verify error message provides guidance
+        error_msg = str(exc_info.value)
+        assert "requires accelerator device count specification" in error_msg
+        assert "ResourceRequirements" in error_msg
+        assert "num_accelerators" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_incompatible_accelerator_requirements(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test validation when user requests more accelerators than available."""
+        # Setup: Mock GPU detection for g5.xlarge (1 GPU)
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = 1
+
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more accelerators than ml.g5.xlarge has (1 GPU)
+        user_requirements = ResourceRequirements(
+            requests={"num_accelerators": 8, "memory": 8192}  # More than available
+        )
+
+        # Execute - should succeed but with warning (we don't validate accelerator count against instance)
+        # This is because accelerator validation is complex and AWS will validate at deployment time
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.g5.xlarge", user_resource_requirements=user_requirements
+        )
+
+        # Verify: Should use user-provided accelerator count
+        assert requirements.number_of_accelerator_devices_required == 8
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_validation_error_message_format_cpu(self, mock_get_resources, mock_fetch_hub):
+        """Test that CPU validation error messages are properly formatted."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (4, 16384)  # 4 CPUs available
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more CPUs than available
+        user_requirements = ResourceRequirements(requests={"num_cpus": 16, "memory": 8192})
+
+        # Execute and verify error message format
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.m5.xlarge", user_resource_requirements=user_requirements
+            )
+
+        error_msg = str(exc_info.value)
+        # Verify error message contains all required information
+        assert "Resource requirements incompatible" in error_msg
+        assert "ml.m5.xlarge" in error_msg
+        assert "Requested: 16 CPUs" in error_msg
+        assert "Available: 4 CPUs" in error_msg
+        assert "reduce CPU requirements" in error_msg or "larger instance type" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_validation_error_message_format_memory(self, mock_get_resources, mock_fetch_hub):
+        """Test that memory validation error messages are properly formatted."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 16384)  # 16GB available
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more memory than available
+        user_requirements = ResourceRequirements(
+            requests={"num_cpus": 4, "memory": 65536}  # 64GB requested, only 16GB available
+        )
+
+        # Execute and verify error message format
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.m5.2xlarge", user_resource_requirements=user_requirements
+            )
+
+        error_msg = str(exc_info.value)
+        # Verify error message contains all required information
+        assert "Resource requirements incompatible" in error_msg
+        assert "ml.m5.2xlarge" in error_msg
+        assert "Requested: 65536 MB memory" in error_msg
+        assert "Available: 16384 MB memory" in error_msg
+        assert "reduce memory requirements" in error_msg or "larger instance type" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_accelerator_error_message_includes_example_code(
+        self, mock_get_resources, mock_fetch_hub, mock_is_gpu, mock_infer_accel
+    ):
+        """Test that accelerator count error includes example code snippet."""
+        # Setup: Mock GPU detection - GPU instance but unknown count
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = None
+
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute with unknown GPU instance
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.g5.custom", user_resource_requirements=None  # Not in mapping
+            )
+
+        error_msg = str(exc_info.value)
+        # Verify error message includes example code
+        assert "ResourceRequirements" in error_msg
+        assert "num_accelerators" in error_msg
+        assert "requests" in error_msg
+        # Should show how to create ResourceRequirements
+        assert "from sagemaker.core.inference_config import ResourceRequirements" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_both_cpu_and_memory_incompatible(self, mock_get_resources, mock_fetch_hub):
+        """Test error when both CPU and memory requirements exceed capacity."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (2, 4096)  # Small instance
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User requests more than available
+        user_requirements = ResourceRequirements(
+            requests={
+                "num_cpus": 8,  # More than 2 available
+                "memory": 16384,  # More than 4096 available
+            }
+        )
+
+        # Execute - should fail on CPU first (checked first in code)
+        with pytest.raises(ValueError) as exc_info:
+            builder._resolve_compute_requirements(
+                instance_type="ml.t3.small", user_resource_requirements=user_requirements
+            )
+
+        error_msg = str(exc_info.value)
+        # Should report CPU incompatibility (checked first)
+        assert "Resource requirements incompatible" in error_msg
+        assert "CPUs" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_zero_accelerator_count_explicit(self, mock_get_resources, mock_fetch_hub):
+        """Test that explicitly setting 0 accelerators on CPU instance is stripped."""
+        # Setup
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (8, 32768)
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # User explicitly sets 0 accelerators on a CPU instance
+        user_requirements = ResourceRequirements(
+            requests={"num_accelerators": 0, "num_cpus": 4, "memory": 8192}
+        )
+
+        # Execute
+        requirements = builder._resolve_compute_requirements(
+            instance_type="ml.m5.2xlarge", user_resource_requirements=user_requirements
+        )
+
+        # Verify: Accelerator count is stripped for CPU instances
+        from sagemaker.core.utils.utils import Unassigned
+
+        assert isinstance(requirements.number_of_accelerator_devices_required, Unassigned)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sagemaker-serve/tests/unit/test_deploy_passes_inference_config.py b/sagemaker-serve/tests/unit/test_deploy_passes_inference_config.py
new file mode 100644
index 0000000000..c3b3c480ed
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_deploy_passes_inference_config.py
@@ -0,0 +1,200 @@
+"""
+Test to verify that deploy() method passes inference_config to _deploy_model_customization.
+This test validates task 4.4 requirements.
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+from sagemaker.core.inference_config import ResourceRequirements  # Correct import!
+
+
+class TestDeployPassesInferenceConfig(unittest.TestCase):
+    """Test that deploy() correctly passes inference_config to _deploy_model_customization."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    @patch(
+        "sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model"
+    )
+    def test_deploy_passes_inference_config_to_deploy_model_customization(
+        self,
+        mock_fetch_default_instance,
+        mock_is_model_customization,
+        mock_deploy_model_customization,
+    ):
+        """Test that deploy() passes inference_config parameter to _deploy_model_customization."""
+        # Setup: Mock model customization check
+        mock_is_model_customization.return_value = True
+        mock_fetch_default_instance.return_value = "ml.g5.12xlarge"
+
+        # Setup: Mock _deploy_model_customization to return a mock endpoint
+        mock_endpoint = Mock()
+        mock_deploy_model_customization.return_value = mock_endpoint
+
+        # Create ModelBuilder
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Mark as built
+        builder.built_model = Mock()
+
+        # Create inference_config
+        inference_config = ResourceRequirements(
+            requests={"num_cpus": 8, "memory": 16384, "num_accelerators": 4}
+        )
+
+        # Execute: Call deploy() with inference_config
+        result = builder.deploy(
+            endpoint_name="test-endpoint",
+            inference_config=inference_config,
+            initial_instance_count=1,
+            wait=True,
+        )
+
+        # Verify: _deploy_model_customization was called with inference_config
+        assert mock_deploy_model_customization.called
+        call_kwargs = mock_deploy_model_customization.call_args[1]
+
+        # Verify inference_config was passed through
+        assert "inference_config" in call_kwargs
+        assert call_kwargs["inference_config"] == inference_config
+
+        # Verify other parameters were also passed
+        assert call_kwargs["endpoint_name"] == "test-endpoint"
+        assert call_kwargs["initial_instance_count"] == 1
+        assert call_kwargs["wait"] == True
+
+        # Verify the result is the mock endpoint
+        assert result == mock_endpoint
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    @patch(
+        "sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model"
+    )
+    def test_deploy_passes_none_when_inference_config_not_provided(
+        self,
+        mock_fetch_default_instance,
+        mock_is_model_customization,
+        mock_deploy_model_customization,
+    ):
+        """Test backward compatibility: deploy() passes None when inference_config not provided."""
+        # Setup
+        mock_is_model_customization.return_value = True
+        mock_fetch_default_instance.return_value = "ml.g5.12xlarge"
+        mock_endpoint = Mock()
+        mock_deploy_model_customization.return_value = mock_endpoint
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        builder.built_model = Mock()
+
+        # Execute: Call deploy() WITHOUT inference_config
+        result = builder.deploy(endpoint_name="test-endpoint", initial_instance_count=1)
+
+        # Verify: _deploy_model_customization was called with inference_config=None
+        assert mock_deploy_model_customization.called
+        call_kwargs = mock_deploy_model_customization.call_args[1]
+
+        # Verify inference_config is None (backward compatibility)
+        assert "inference_config" in call_kwargs
+        assert call_kwargs["inference_config"] is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_model_customization")
+    @patch(
+        "sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model"
+    )
+    def test_deploy_only_passes_resource_requirements_type(
+        self,
+        mock_fetch_default_instance,
+        mock_is_model_customization,
+        mock_deploy_model_customization,
+    ):
+        """Test that deploy() only passes inference_config if it's ResourceRequirements type."""
+        # Setup
+        mock_is_model_customization.return_value = True
+        mock_fetch_default_instance.return_value = "ml.g5.12xlarge"
+        mock_endpoint = Mock()
+        mock_deploy_model_customization.return_value = mock_endpoint
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        builder.built_model = Mock()
+
+        # Create a non-ResourceRequirements inference_config (e.g., ServerlessInferenceConfig)
+        from sagemaker.core.inference_config import ServerlessInferenceConfig
+
+        serverless_config = ServerlessInferenceConfig(memory_size_in_mb=4096, max_concurrency=10)
+
+        # Execute: Call deploy() with ServerlessInferenceConfig
+        # This should NOT pass it to _deploy_model_customization
+        result = builder.deploy(endpoint_name="test-endpoint", inference_config=serverless_config)
+
+        # Verify: _deploy_model_customization was called with inference_config=None
+        # because ServerlessInferenceConfig is not ResourceRequirements
+        assert mock_deploy_model_customization.called
+        call_kwargs = mock_deploy_model_customization.call_args[1]
+
+        # Verify inference_config is None (not ServerlessInferenceConfig)
+        assert "inference_config" in call_kwargs
+        assert call_kwargs["inference_config"] is None
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sagemaker-serve/tests/unit/test_inference_config_parameter_handling.py b/sagemaker-serve/tests/unit/test_inference_config_parameter_handling.py
new file mode 100644
index 0000000000..36f312e7f8
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_inference_config_parameter_handling.py
@@ -0,0 +1,769 @@
+"""
+Unit tests for ModelBuilder inference_config parameter handling.
+Tests the _deploy_model_customization method with inference_config parameter.
+
+Requirements: 2.3, 2.4, 2.5
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock, call
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+from sagemaker.core.compute_resource_requirements.resource_requirements import ResourceRequirements
+from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+
+class TestInferenceConfigParameterHandling(unittest.TestCase):
+    """Test inference_config parameter handling in deployment - Requirements 2.3, 2.4, 2.5"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    def test_inference_config_provided_all_fields(
+        self,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+    ):
+        """Test deployment with inference_config containing all ResourceRequirements fields."""
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Mock endpoint doesn't exist (new deployment)
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        # Setup: Mock endpoint creation
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        # Create ModelBuilder
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # User provides inference_config with all fields
+        inference_config = ResourceRequirements(
+            requests={"num_cpus": 8, "memory": 16384, "num_accelerators": 4},
+            limits={"memory": 32768},
+        )
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="test-endpoint", inference_config=inference_config
+        )
+
+        # Verify: InferenceComponent.create was called with correct compute requirements
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+
+        # Extract compute requirements from the specification
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify all fields are present
+        assert compute_reqs.number_of_cpu_cores_required == 8
+        assert compute_reqs.min_memory_required_in_mb == 16384
+        assert compute_reqs.max_memory_required_in_mb == 32768
+        assert compute_reqs.number_of_accelerator_devices_required == 4
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    def test_inference_config_provided_partial_fields(
+        self,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+    ):
+        """Test deployment with inference_config containing only some fields."""
+        # Setup
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # User provides inference_config with only accelerator count and memory
+        inference_config = ResourceRequirements(
+            requests={"num_accelerators": 2, "memory": 8192}  # Required field
+        )
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="test-endpoint", inference_config=inference_config
+        )
+
+        # Verify: InferenceComponent.create was called with accelerator count
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify accelerator count and memory are set
+        assert compute_reqs.number_of_accelerator_devices_required == 2
+        assert compute_reqs.min_memory_required_in_mb == 8192
+        # CPU cores should be None (not set)
+        assert compute_reqs.number_of_cpu_cores_required is None
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._infer_accelerator_count_from_instance_type")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._is_gpu_instance")
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_inference_config_not_provided_uses_cached_requirements(
+        self,
+        mock_get_resources,
+        mock_fetch_hub,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+        mock_is_gpu,
+        mock_infer_accel,
+    ):
+        """Test deployment without inference_config uses cached compute requirements from build()."""
+        # Setup: Mock GPU detection for g5.12xlarge
+        mock_is_gpu.return_value = True
+        mock_infer_accel.return_value = 4
+
+        # Setup: Mock hub document with default compute requirements
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "ComputeResourceRequirements": {
+                        "NumberOfCpuCoresRequired": 4,
+                        "MinMemoryRequiredInMb": 8192,
+                        "NumberOfAcceleratorDevicesRequired": 4,
+                    },
+                }
+            ]
+        }
+        mock_get_resources.return_value = (48, 196608)  # g5.12xlarge specs
+
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Simulate build() being called which resolves compute requirements
+        cached_requirements = builder._resolve_compute_requirements(
+            instance_type="ml.g5.12xlarge", user_resource_requirements=None
+        )
+        builder._cached_compute_requirements = cached_requirements
+
+        # Execute deployment WITHOUT inference_config
+        builder._deploy_model_customization(endpoint_name="test-endpoint", inference_config=None)
+
+        # Verify: InferenceComponent.create was called with cached requirements
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify cached requirements were used
+        assert compute_reqs.number_of_cpu_cores_required == 4
+        assert compute_reqs.min_memory_required_in_mb == 1024
+        assert compute_reqs.number_of_accelerator_devices_required == 4
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    def test_inference_config_overrides_cached_requirements(
+        self,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+    ):
+        """Test that inference_config takes precedence over cached requirements."""
+        # Setup
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Set cached requirements (from build())
+        from sagemaker.core.utils.utils import Unassigned
+
+        cached_requirements = InferenceComponentComputeResourceRequirements(
+            number_of_cpu_cores_required=4,
+            min_memory_required_in_mb=8192,
+            number_of_accelerator_devices_required=2,
+        )
+        builder._cached_compute_requirements = cached_requirements
+
+        # User provides different inference_config
+        inference_config = ResourceRequirements(
+            requests={"num_cpus": 16, "memory": 32768, "num_accelerators": 8}
+        )
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="test-endpoint", inference_config=inference_config
+        )
+
+        # Verify: InferenceComponent.create was called with inference_config values, not cached
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify inference_config values were used (not cached)
+        assert compute_reqs.number_of_cpu_cores_required == 16
+        assert compute_reqs.min_memory_required_in_mb == 32768
+        assert compute_reqs.number_of_accelerator_devices_required == 8
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    def test_all_resource_requirements_fields_reach_api_call(
+        self,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+    ):
+        """Test that all ResourceRequirements fields reach the CreateInferenceComponent API call."""
+        # Setup
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # Provide inference_config with all possible fields
+        inference_config = ResourceRequirements(
+            requests={"num_cpus": 12, "memory": 24576, "num_accelerators": 4},
+            limits={"memory": 49152},
+        )
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="test-endpoint", inference_config=inference_config
+        )
+
+        # Verify: All fields are present in the API call
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify each field individually
+        assert (
+            compute_reqs.number_of_cpu_cores_required == 12
+        ), "number_of_cpu_cores_required should be 12"
+        assert (
+            compute_reqs.min_memory_required_in_mb == 24576
+        ), "min_memory_required_in_mb should be 24576"
+        assert (
+            compute_reqs.max_memory_required_in_mb == 49152
+        ), "max_memory_required_in_mb should be 49152"
+        assert (
+            compute_reqs.number_of_accelerator_devices_required == 4
+        ), "number_of_accelerator_devices_required should be 4"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    @patch("sagemaker.core.resources.Tag.get_all")
+    def test_inference_config_with_existing_endpoint_lora_adapter(
+        self,
+        mock_tag_get_all,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+    ):
+        """Test inference_config with existing endpoint (LORA adapter deployment)."""
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Endpoint exists
+        mock_does_endpoint_exist.return_value = True
+        mock_fetch_peft.return_value = "LORA"
+
+        mock_endpoint = Mock()
+        mock_endpoint_get.return_value = mock_endpoint
+
+        # Setup: Mock existing base inference component
+        mock_base_component = Mock()
+        mock_base_component.inference_component_name = "base-component"
+        mock_base_component.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/base"
+        )
+        mock_ic_get_all.return_value = [mock_base_component]
+
+        # Setup: Mock tags for base component
+        mock_tag = Mock()
+        mock_tag.key = "Base"
+        mock_tag.value = "test-recipe"
+        mock_tag_get_all.return_value = [mock_tag]
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.g5.12xlarge",
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # User provides inference_config for adapter
+        inference_config = ResourceRequirements(requests={"num_accelerators": 1, "memory": 4096})
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="existing-endpoint", inference_config=inference_config
+        )
+
+        # Verify: InferenceComponent.create was called with inference_config
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify inference_config values were used
+        assert compute_reqs.number_of_accelerator_devices_required == 1
+        assert compute_reqs.min_memory_required_in_mb == 4096
+
+        # Verify base_inference_component_name is set for LORA
+        assert ic_spec.base_inference_component_name == "base-component"
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.Action.create")
+    @patch("sagemaker.core.resources.Artifact.get_all")
+    @patch("sagemaker.core.resources.Association.add")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package_arn")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_peft")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._does_endpoint_exist")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    def test_inference_config_with_zero_accelerators(
+        self,
+        mock_ic_get_all,
+        mock_ic_create,
+        mock_endpoint_get,
+        mock_endpoint_create,
+        mock_endpoint_config_create,
+        mock_does_endpoint_exist,
+        mock_fetch_package,
+        mock_fetch_peft,
+        mock_fetch_package_arn,
+        mock_association_add,
+        mock_artifact_get_all,
+        mock_action_create,
+        mock_ic_get,
+    ):
+        """Test inference_config with zero accelerators (CPU-only deployment)."""
+        # Setup
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        mock_does_endpoint_exist.return_value = False
+        mock_fetch_peft.return_value = "FULL"
+
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Setup: Mock InferenceComponent.get for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Setup: Mock lineage tracking
+        mock_fetch_package_arn.return_value = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test"
+        )
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-west-2:123456789012:artifact/test"
+        mock_artifact_get_all.return_value = [mock_artifact]
+
+        builder = ModelBuilder(
+            model="huggingface-text-classification",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-text-classification",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.m5.2xlarge",  # CPU instance
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+        )
+
+        # User explicitly sets 0 accelerators for CPU deployment
+        inference_config = ResourceRequirements(
+            requests={"num_cpus": 4, "memory": 8192, "num_accelerators": 0}
+        )
+
+        # Execute
+        builder._deploy_model_customization(
+            endpoint_name="test-endpoint", inference_config=inference_config
+        )
+
+        # Verify: InferenceComponent.create was called with 0 accelerators
+        assert mock_ic_create.called
+        call_kwargs = mock_ic_create.call_args[1]
+        ic_spec = call_kwargs["specification"]
+        compute_reqs = ic_spec.compute_resource_requirements
+
+        # Verify 0 accelerators is accepted
+        assert compute_reqs.number_of_accelerator_devices_required == 0
+        assert compute_reqs.number_of_cpu_cores_required == 4
+        assert compute_reqs.min_memory_required_in_mb == 8192
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sagemaker-serve/tests/unit/test_instance_type_inference.py b/sagemaker-serve/tests/unit/test_instance_type_inference.py
new file mode 100644
index 0000000000..d62ba4c103
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_instance_type_inference.py
@@ -0,0 +1,585 @@
+"""
+Unit tests for ModelBuilder instance type inference edge cases.
+Tests the _infer_instance_type_from_jumpstart method with various scenarios.
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+import pytest
+
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.serve.mode.function_pointers import Mode
+
+
+class TestInstanceTypeInferenceEdgeCases(unittest.TestCase):
+    """Test instance type inference edge cases - Requirements 1.3, 1.4"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_missing_jumpstart_metadata_no_hosting_configs(self, mock_fetch_hub):
+        """Test with missing JumpStart metadata - no HostingConfigs."""
+        # Setup: Hub document without HostingConfigs
+        mock_fetch_hub.return_value = {}
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._infer_instance_type_from_jumpstart()
+
+        # Verify error message content
+        error_msg = str(exc_info.value)
+        assert "Unable to infer instance type" in error_msg
+        assert "does not have hosting configuration" in error_msg
+        assert "Please specify instance_type explicitly" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_missing_jumpstart_metadata_empty_hosting_configs(self, mock_fetch_hub):
+        """Test with empty HostingConfigs list."""
+        # Setup: Hub document with empty HostingConfigs
+        mock_fetch_hub.return_value = {"HostingConfigs": []}
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._infer_instance_type_from_jumpstart()
+
+        error_msg = str(exc_info.value)
+        assert "Unable to infer instance type" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_missing_instance_types_in_metadata(self, mock_fetch_hub):
+        """Test with metadata that has no instance type information."""
+        # Setup: Hub document with HostingConfigs but no instance types
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    # Missing both SupportedInstanceTypes and InstanceType/DefaultInstanceType
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._infer_instance_type_from_jumpstart()
+
+        error_msg = str(exc_info.value)
+        assert "Unable to infer instance type" in error_msg
+        assert "does not specify supported instance types" in error_msg
+        assert "Please specify instance_type explicitly" in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_gpu_required_model_selects_gpu_instance(self, mock_fetch_hub):
+        """Test that the first supported instance type is selected."""
+        # Setup: Hub document with both GPU and CPU instance types
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "SupportedInstanceTypes": [
+                        "ml.m5.xlarge",  # CPU instance
+                        "ml.g5.xlarge",  # GPU instance
+                        "ml.g5.2xlarge",  # GPU instance
+                        "ml.p4d.24xlarge",  # GPU instance
+                    ],
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        instance_type = builder._infer_instance_type_from_jumpstart()
+
+        # Verify: Should select the first supported instance type
+        assert instance_type == "ml.m5.xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_gpu_required_model_with_various_gpu_families(self, mock_fetch_hub):
+        """Test GPU instance selection across different GPU families."""
+        # Setup: Hub document with various GPU families
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "SupportedInstanceTypes": [
+                        "ml.g4dn.xlarge",  # G4 GPU
+                        "ml.p3.2xlarge",  # P3 GPU
+                        "ml.p5.48xlarge",  # P5 GPU
+                    ],
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        instance_type = builder._infer_instance_type_from_jumpstart()
+
+        # Verify: Should select first GPU instance type
+        assert instance_type == "ml.g4dn.xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_default_instance_type_takes_precedence(self, mock_fetch_hub):
+        """Test that DefaultInstanceType/InstanceType takes precedence over supported list."""
+        # Setup: Hub document with both default and supported instance types
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "InstanceType": "ml.g5.12xlarge",
+                    "SupportedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge", "ml.g5.12xlarge"],
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        instance_type = builder._infer_instance_type_from_jumpstart()
+
+        # Verify: Should use the default instance type
+        assert instance_type == "ml.g5.12xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_error_message_includes_supported_types(self, mock_fetch_hub):
+        """Test that error messages include available instance types when possible."""
+        # Setup: Hub document that will cause an error but has supported types
+        supported_types = ["ml.g5.xlarge", "ml.g5.2xlarge", "ml.g5.12xlarge"]
+
+        def side_effect_fetch():
+            # First call raises exception
+            if not hasattr(side_effect_fetch, "call_count"):
+                side_effect_fetch.call_count = 0
+            side_effect_fetch.call_count += 1
+
+            if side_effect_fetch.call_count == 1:
+                raise Exception("Test error")
+            else:
+                # Second call in error handling returns valid data
+                return {
+                    "HostingConfigs": [
+                        {"Profile": "Default", "SupportedInstanceTypes": supported_types}
+                    ]
+                }
+
+        mock_fetch_hub.side_effect = side_effect_fetch
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._infer_instance_type_from_jumpstart()
+
+        error_msg = str(exc_info.value)
+        assert "Unable to infer instance type" in error_msg
+        assert "Supported instance types for this model:" in error_msg
+        assert str(supported_types) in error_msg
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_cpu_only_model_selects_cpu_instance(self, mock_fetch_hub):
+        """Test that CPU-only models correctly select CPU instance types."""
+        # Setup: Hub document with only CPU instance types
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "SupportedInstanceTypes": ["ml.m5.xlarge", "ml.m5.2xlarge", "ml.c5.xlarge"],
+                }
+            ]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-text-classification",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-text-classification",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        instance_type = builder._infer_instance_type_from_jumpstart()
+
+        # Verify: Should select first CPU instance type
+        assert instance_type == "ml.m5.xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_non_default_profile_fallback(self, mock_fetch_hub):
+        """Test fallback to first config when Default profile is not present."""
+        # Setup: Hub document without Default profile
+        mock_fetch_hub.return_value = {
+            "HostingConfigs": [{"Profile": "CustomProfile", "InstanceType": "ml.g5.2xlarge"}]
+        }
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute
+        instance_type = builder._infer_instance_type_from_jumpstart()
+
+        # Verify: Should use the first (and only) config
+        assert instance_type == "ml.g5.2xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_fetch_hub_document_exception_handling(self, mock_fetch_hub):
+        """Test proper exception handling when fetching hub document fails."""
+        # Setup: Mock fetch to raise an exception
+        mock_fetch_hub.side_effect = Exception("Network error")
+
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Execute and verify
+        with pytest.raises(ValueError) as exc_info:
+            builder._infer_instance_type_from_jumpstart()
+
+        error_msg = str(exc_info.value)
+        assert "Unable to infer instance type" in error_msg
+        assert "Network error" in error_msg
+        assert "Please specify instance_type explicitly" in error_msg
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+class TestInstanceTypeInferenceIntegration(unittest.TestCase):
+    """Test instance type inference integration with model customization flow - Requirement 1.1"""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_session = Mock()
+        self.mock_session.boto_region_name = "us-west-2"
+        self.mock_session.default_bucket.return_value = "test-bucket"
+        self.mock_session.default_bucket_prefix = "test-prefix"
+        self.mock_session.config = {}
+        self.mock_session.sagemaker_config = {}
+        self.mock_session.settings = Mock()
+        self.mock_session.settings.include_jumpstart_tags = False
+
+        mock_credentials = Mock()
+        mock_credentials.access_key = "test-key"
+        mock_credentials.secret_key = "test-secret"
+        mock_credentials.token = None
+        self.mock_session.boto_session = Mock()
+        self.mock_session.boto_session.get_credentials.return_value = mock_credentials
+        self.mock_session.boto_session.region_name = "us-west-2"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_instance_type_inference_called_when_none_in_model_customization(
+        self, mock_get_resources, mock_fetch_hub, mock_fetch_package
+    ):
+        """Test that _infer_instance_type_from_jumpstart is called when instance_type is None in model customization."""
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document with recipe but no instance type in recipe config
+        mock_fetch_hub.return_value = {
+            "RecipeCollection": [
+                {
+                    "Name": "test-recipe",
+                    "HostingConfigs": [
+                        {
+                            "Profile": "Default",
+                            "EcrAddress": "123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+                            # No InstanceType or DefaultInstanceType in recipe config
+                            "ComputeResourceRequirements": {"NumberOfCpuCoresRequired": 4},
+                        }
+                    ],
+                }
+            ],
+            # Add HostingConfigs at root level for _infer_instance_type_from_jumpstart
+            "HostingConfigs": [
+                {
+                    "Profile": "Default",
+                    "InstanceType": "ml.g5.12xlarge",
+                    "SupportedInstanceTypes": ["ml.g5.xlarge", "ml.g5.12xlarge"],
+                }
+            ],
+        }
+
+        mock_get_resources.return_value = (8, 32768)  # 8 CPUs, 32GB RAM
+
+        # Create ModelBuilder without instance_type
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        # Manually set instance_type to None to simulate the scenario
+        builder.instance_type = None
+
+        # Execute: Call _fetch_and_cache_recipe_config which should trigger inference
+        builder._fetch_and_cache_recipe_config()
+
+        # Verify: instance_type should be inferred from JumpStart metadata
+        assert builder.instance_type is not None
+        assert builder.instance_type == "ml.g5.12xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_instance_type_not_inferred_when_provided_in_recipe(
+        self, mock_get_resources, mock_fetch_hub, mock_fetch_package
+    ):
+        """Test that _infer_instance_type_from_jumpstart is NOT called when instance_type is in recipe config."""
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document with recipe that HAS instance type
+        mock_fetch_hub.return_value = {
+            "RecipeCollection": [
+                {
+                    "Name": "test-recipe",
+                    "HostingConfigs": [
+                        {
+                            "Profile": "Default",
+                            "EcrAddress": "123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+                            "InstanceType": "ml.g5.2xlarge",  # Instance type provided in recipe
+                            "ComputeResourceRequirements": {"NumberOfCpuCoresRequired": 4},
+                        }
+                    ],
+                }
+            ]
+        }
+
+        mock_get_resources.return_value = (8, 32768)
+
+        # Create ModelBuilder without instance_type
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",
+        )
+
+        builder.instance_type = None
+
+        # Execute
+        builder._fetch_and_cache_recipe_config()
+
+        # Verify: Should use instance type from recipe config, not inference
+        assert builder.instance_type == "ml.g5.2xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._get_instance_resources")
+    def test_instance_type_preserved_when_explicitly_provided(
+        self, mock_get_resources, mock_fetch_hub, mock_fetch_package
+    ):
+        """Test backward compatibility: explicitly provided instance_type is preserved."""
+        # Setup: Mock model package
+        mock_package = Mock()
+        mock_package.inference_specification.containers = [Mock()]
+        mock_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_package.inference_specification.containers[
+            0
+        ].model_data_source.s3_data_source.s3_uri = "s3://test-bucket/model"
+        mock_fetch_package.return_value = mock_package
+
+        # Setup: Hub document
+        mock_fetch_hub.return_value = {
+            "RecipeCollection": [
+                {
+                    "Name": "test-recipe",
+                    "HostingConfigs": [
+                        {
+                            "Profile": "Default",
+                            "EcrAddress": "123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest",
+                            "InstanceType": "ml.g5.12xlarge",  # Different from user-provided
+                            "ComputeResourceRequirements": {"NumberOfCpuCoresRequired": 4},
+                        }
+                    ],
+                }
+            ]
+        }
+
+        mock_get_resources.return_value = (8, 32768)
+
+        # Create ModelBuilder WITH explicit instance_type
+        builder = ModelBuilder(
+            model="huggingface-llm-mistral-7b",
+            model_metadata={
+                "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b",
+                "CUSTOM_MODEL_VERSION": "1.0.0",
+            },
+            instance_type="ml.p4d.24xlarge",  # User explicitly provides instance type
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+        )
+
+        # Execute
+        builder._fetch_and_cache_recipe_config()
+
+        # Verify: Should preserve user-provided instance type
+        assert builder.instance_type == "ml.p4d.24xlarge"
+
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_model_package")
+    @patch("sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model")
+    def test_inference_only_called_for_model_customization(
+        self, mock_fetch_hub, mock_fetch_package
+    ):
+        """Test that inference is only called during model customization flow, not for regular models."""
+        # This test verifies that _fetch_and_cache_recipe_config is only called
+        # in the model customization flow, which is the only place where
+        # _infer_instance_type_from_jumpstart should be called
+
+        # For regular (non-model-customization) models, _fetch_and_cache_recipe_config
+        # is never called, so _infer_instance_type_from_jumpstart won't be called either
+
+        # Create a regular ModelBuilder (not model customization)
+        builder = ModelBuilder(
+            model="my-local-model",  # Not a model customization
+            mode=Mode.SAGEMAKER_ENDPOINT,
+            role_arn="arn:aws:iam::123456789012:role/TestRole",
+            sagemaker_session=self.mock_session,
+            instance_type="ml.m5.xlarge",  # Provide instance_type to avoid auto-detection
+        )
+
+        # Verify: _is_model_customization should return False
+        assert not builder._is_model_customization()
+
+        # For model customization, _fetch_and_cache_recipe_config is called
+        # which is where instance type inference happens
+        # This is tested in the previous tests
diff --git a/sagemaker-serve/tests/unit/test_model_builder.py b/sagemaker-serve/tests/unit/test_model_builder.py
index 854556438e..19f7cae31f 100644
--- a/sagemaker-serve/tests/unit/test_model_builder.py
+++ b/sagemaker-serve/tests/unit/test_model_builder.py
@@ -515,3 +515,203 @@ def test_deploy_model_customization_new_endpoint(self):
                                                 )
         
         self.assertEqual(result, mock_endpoint)
+
+    def test_deploy_model_customization_with_inference_config(self):
+        """Test _deploy_model_customization with inference_config parameter."""
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+        from sagemaker.core.resources import Endpoint, EndpointConfig, InferenceComponent, Action, Association, Artifact
+        from sagemaker.core.inference_config import ResourceRequirements
+        
+        # Setup mocks
+        mock_endpoint_config = Mock()
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = "arn:aws:sagemaker:us-east-1:123456789012:inference-component/test-ic"
+        mock_action = Mock()
+        mock_action.action_arn = "arn:aws:sagemaker:us-east-1:123456789012:action/test-action"
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-east-1:123456789012:artifact/test-artifact"
+        
+        mock_model_package = Mock()
+        mock_model_package.inference_specification.containers = [Mock()]
+        mock_model_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_model_package.inference_specification.containers[0].model_data_source.s3_data_source.s3_uri = "s3://bucket/model"
+        
+        builder = ModelBuilder(
+            model=self.mock_training_job,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            sagemaker_session=self.mock_session,
+            image_uri="test-image:latest",
+            instance_type="ml.g5.12xlarge"
+        )
+        
+        # Set cached compute requirements (should be overridden by inference_config)
+        builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=1024,
+            number_of_cpu_cores_required=1,
+            number_of_accelerator_devices_required=1
+        )
+        
+        # Create inference_config with different values
+        inference_config = ResourceRequirements(
+            requests={
+                "num_accelerators": 4,
+                "num_cpus": 8,
+                "memory": 49152
+            },
+            limits={
+                "memory": 98304
+            }
+        )
+        
+        # Track the InferenceComponent.create call to verify compute requirements
+        created_ic_spec = None
+        def capture_ic_create(**kwargs):
+            nonlocal created_ic_spec
+            created_ic_spec = kwargs.get('specification')
+            return mock_ic
+        
+        with patch.object(builder, '_fetch_model_package', return_value=mock_model_package):
+            with patch.object(builder, '_fetch_peft', return_value=None):
+                with patch.object(EndpointConfig, 'create', return_value=mock_endpoint_config):
+                    with patch.object(Endpoint, 'get', side_effect=ClientError({'Error': {'Code': 'ValidationException'}}, 'GetEndpoint')):
+                        with patch.object(Endpoint, 'create', return_value=mock_endpoint):
+                            with patch.object(InferenceComponent, 'create', side_effect=capture_ic_create):
+                                with patch.object(InferenceComponent, 'get', return_value=mock_ic):
+                                    with patch.object(Action, 'create', return_value=mock_action):
+                                        with patch.object(Artifact, 'get_all', return_value=[mock_artifact]):
+                                            with patch.object(Association, 'add', return_value=None):
+                                                result = builder._deploy_model_customization(
+                                                    endpoint_name="test-endpoint",
+                                                    instance_type="ml.g5.12xlarge",
+                                                    initial_instance_count=1,
+                                                    inference_config=inference_config
+                                                )
+        
+        # Verify the result
+        self.assertEqual(result, mock_endpoint)
+        
+        # Verify that inference_config values were used (not cached values)
+        self.assertIsNotNone(created_ic_spec)
+        compute_reqs = created_ic_spec.compute_resource_requirements
+        self.assertEqual(compute_reqs.min_memory_required_in_mb, 49152)
+        self.assertEqual(compute_reqs.max_memory_required_in_mb, 98304)
+        self.assertEqual(compute_reqs.number_of_cpu_cores_required, 8)
+        self.assertEqual(compute_reqs.number_of_accelerator_devices_required, 4)
+
+    def test_deploy_model_customization_without_inference_config_uses_cached(self):
+        """Test _deploy_model_customization falls back to cached requirements when inference_config not provided."""
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+        from sagemaker.core.resources import Endpoint, EndpointConfig, InferenceComponent, Action, Association, Artifact
+        
+        # Setup mocks
+        mock_endpoint_config = Mock()
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = "arn:aws:sagemaker:us-east-1:123456789012:inference-component/test-ic"
+        mock_action = Mock()
+        mock_action.action_arn = "arn:aws:sagemaker:us-east-1:123456789012:action/test-action"
+        mock_artifact = Mock()
+        mock_artifact.artifact_arn = "arn:aws:sagemaker:us-east-1:123456789012:artifact/test-artifact"
+        
+        mock_model_package = Mock()
+        mock_model_package.inference_specification.containers = [Mock()]
+        mock_model_package.inference_specification.containers[0].base_model.recipe_name = "test-recipe"
+        mock_model_package.inference_specification.containers[0].model_data_source.s3_data_source.s3_uri = "s3://bucket/model"
+        
+        builder = ModelBuilder(
+            model=self.mock_training_job,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            sagemaker_session=self.mock_session,
+            image_uri="test-image:latest",
+            instance_type="ml.g5.2xlarge"
+        )
+        
+        # Set cached compute requirements
+        cached_reqs = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=2048,
+            number_of_cpu_cores_required=2,
+            number_of_accelerator_devices_required=1
+        )
+        builder._cached_compute_requirements = cached_reqs
+        
+        # Track the InferenceComponent.create call to verify compute requirements
+        created_ic_spec = None
+        def capture_ic_create(**kwargs):
+            nonlocal created_ic_spec
+            created_ic_spec = kwargs.get('specification')
+            return mock_ic
+        
+        with patch.object(builder, '_fetch_model_package', return_value=mock_model_package):
+            with patch.object(builder, '_fetch_peft', return_value=None):
+                with patch.object(EndpointConfig, 'create', return_value=mock_endpoint_config):
+                    with patch.object(Endpoint, 'get', side_effect=ClientError({'Error': {'Code': 'ValidationException'}}, 'GetEndpoint')):
+                        with patch.object(Endpoint, 'create', return_value=mock_endpoint):
+                            with patch.object(InferenceComponent, 'create', side_effect=capture_ic_create):
+                                with patch.object(InferenceComponent, 'get', return_value=mock_ic):
+                                    with patch.object(Action, 'create', return_value=mock_action):
+                                        with patch.object(Artifact, 'get_all', return_value=[mock_artifact]):
+                                            with patch.object(Association, 'add', return_value=None):
+                                                result = builder._deploy_model_customization(
+                                                    endpoint_name="test-endpoint",
+                                                    instance_type="ml.g5.2xlarge",
+                                                    initial_instance_count=1
+                                                    # Note: no inference_config parameter
+                                                )
+        
+        # Verify the result
+        self.assertEqual(result, mock_endpoint)
+        
+        # Verify that cached requirements were used
+        self.assertIsNotNone(created_ic_spec)
+        compute_reqs = created_ic_spec.compute_resource_requirements
+        self.assertIs(compute_reqs, cached_reqs)
+
+    def test_deploy_passes_inference_config_to_model_customization(self):
+        """Test that deploy() passes inference_config to _deploy_model_customization for model customization deployments."""
+        from sagemaker.core.inference_config import ResourceRequirements
+        
+        # Create a mock training job that will be recognized as model customization
+        mock_training_job = Mock()
+        mock_training_job.training_job_name = "test-training-job"
+        
+        builder = ModelBuilder(
+            model=mock_training_job,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            sagemaker_session=self.mock_session,
+            image_uri="test-image:latest",
+            instance_type="ml.g5.12xlarge"
+        )
+        
+        # Mark as built
+        builder.built_model = Mock()
+        
+        # Create inference_config
+        inference_config = ResourceRequirements(
+            requests={
+                "num_accelerators": 4,
+                "num_cpus": 8,
+                "memory": 49152
+            }
+        )
+        
+        # Mock _is_model_customization to return True
+        with patch.object(builder, '_is_model_customization', return_value=True):
+            # Mock _deploy_model_customization to capture the call
+            with patch.object(builder, '_deploy_model_customization') as mock_deploy_mc:
+                mock_endpoint = Mock()
+                mock_deploy_mc.return_value = mock_endpoint
+                
+                # Call deploy with inference_config
+                result = builder.deploy(
+                    endpoint_name="test-endpoint",
+                    inference_config=inference_config
+                )
+                
+                # Verify _deploy_model_customization was called with inference_config
+                mock_deploy_mc.assert_called_once()
+                call_kwargs = mock_deploy_mc.call_args[1]
+                self.assertEqual(call_kwargs['inference_config'], inference_config)
+                self.assertEqual(result, mock_endpoint)
diff --git a/sagemaker-serve/tests/unit/test_two_stage_deployment.py b/sagemaker-serve/tests/unit/test_two_stage_deployment.py
new file mode 100644
index 0000000000..827d3bea41
--- /dev/null
+++ b/sagemaker-serve/tests/unit/test_two_stage_deployment.py
@@ -0,0 +1,594 @@
+"""Unit tests for two-stage deployment support (base model + adapter).
+
+Tests verify that:
+1. Base models are correctly tagged as "Base"
+2. Full fine-tuned models are NOT tagged as "Base"
+3. LORA adapters correctly reference base components
+4. Separate inference components are created for base and adapter
+"""
+
+import pytest
+from unittest.mock import Mock, patch, MagicMock, call
+from sagemaker.serve.model_builder import ModelBuilder
+from sagemaker.core.resources import ModelPackage, TrainingJob
+
+
+class TestTwoStageDeployment:
+    """Test two-stage deployment pattern for base models and adapters."""
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch.object(ModelBuilder, "_fetch_model_package_arn")
+    @patch.object(ModelBuilder, "_fetch_model_package")
+    @patch.object(ModelBuilder, "_fetch_peft")
+    @patch.object(ModelBuilder, "_does_endpoint_exist")
+    @patch.object(ModelBuilder, "_fetch_hub_document_for_custom_model")
+    @patch.object(ModelBuilder, "_is_model_customization")
+    def test_base_model_deployment_tagged_correctly(
+        self,
+        mock_is_customization,
+        mock_fetch_hub,
+        mock_endpoint_exists,
+        mock_fetch_peft,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+        mock_ic_get,
+    ):
+        """Test that base model deployments are correctly tagged as 'Base'."""
+        # Setup: Base model (no model_data_source, has base_model)
+        mock_is_customization.return_value = True
+        mock_endpoint_exists.return_value = False
+        mock_fetch_peft.return_value = None  # Not a LORA adapter
+
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-base-model"
+        mock_container.model_data_source = None  # No model_data_source = base model
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        mock_fetch_hub.return_value = {
+            "HostingArtifactUri": "s3://jumpstart-bucket/base-model-artifacts/"
+        }
+
+        # Mock endpoint creation
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Mock inference component for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test-ic"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Create ModelBuilder and deploy
+        model_builder = ModelBuilder(
+            model=mock_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+
+        # Mock the cached compute requirements
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        model_builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+        )
+
+        # Deploy with mocked lineage tracking
+        with patch("sagemaker.core.resources.Action"), patch(
+            "sagemaker.core.resources.Association"
+        ), patch("sagemaker.core.resources.Artifact"):
+            model_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify: InferenceComponent.create was called with Base tag
+        assert mock_ic_create.called
+        create_call = mock_ic_create.call_args
+        tags = create_call[1].get("tags", [])
+
+        # Should have exactly one tag with key="Base"
+        assert len(tags) == 1
+        assert tags[0]["key"] == "Base"
+        assert tags[0]["value"] == "test-base-model"
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch.object(ModelBuilder, "_fetch_model_package_arn")
+    @patch.object(ModelBuilder, "_fetch_model_package")
+    @patch.object(ModelBuilder, "_fetch_peft")
+    @patch.object(ModelBuilder, "_does_endpoint_exist")
+    @patch.object(ModelBuilder, "_is_model_customization")
+    def test_full_fine_tuned_model_not_tagged_as_base(
+        self,
+        mock_is_customization,
+        mock_endpoint_exists,
+        mock_fetch_peft,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+        mock_ic_get,
+    ):
+        """Test that full fine-tuned models are NOT tagged as 'Base'."""
+        # Setup: Full fine-tuned model (has model_data_source)
+        mock_is_customization.return_value = True
+        mock_endpoint_exists.return_value = False
+        mock_fetch_peft.return_value = None  # Not a LORA adapter
+
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-base-model"
+
+        # Has model_data_source = full fine-tuned model
+        mock_container.model_data_source = Mock()
+        mock_container.model_data_source.s3_data_source = Mock()
+        mock_container.model_data_source.s3_data_source.s3_uri = "s3://bucket/fine-tuned-model/"
+
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        # Mock endpoint creation
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Mock inference component for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test-ic"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Create ModelBuilder and deploy
+        model_builder = ModelBuilder(
+            model=mock_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+
+        # Mock the cached compute requirements
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        model_builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+        )
+
+        # Deploy with mocked lineage tracking
+        with patch("sagemaker.core.resources.Action"), patch(
+            "sagemaker.core.resources.Association"
+        ), patch("sagemaker.core.resources.Artifact"):
+            model_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify: InferenceComponent.create was called WITHOUT Base tag
+        assert mock_ic_create.called
+        create_call = mock_ic_create.call_args
+        tags = create_call[1].get("tags", [])
+
+        # Should have NO tags (empty list)
+        assert len(tags) == 0
+
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Tag.get_all")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch.object(ModelBuilder, "_fetch_model_package")
+    @patch.object(ModelBuilder, "_fetch_peft")
+    @patch.object(ModelBuilder, "_does_endpoint_exist")
+    @patch.object(ModelBuilder, "_is_model_customization")
+    def test_lora_adapter_references_base_component(
+        self,
+        mock_is_customization,
+        mock_endpoint_exists,
+        mock_fetch_peft,
+        mock_fetch_package,
+        mock_endpoint_get,
+        mock_tag_get_all,
+        mock_ic_create,
+        mock_ic_get_all,
+    ):
+        """Test that LORA adapters correctly reference the base component."""
+        # Setup: LORA adapter deployment on existing endpoint
+        mock_is_customization.return_value = True
+        mock_endpoint_exists.return_value = True
+        mock_fetch_peft.return_value = "LORA"  # This is a LORA adapter
+
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-base-model"
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+
+        # Mock existing endpoint
+        mock_endpoint = Mock()
+        mock_endpoint_get.return_value = mock_endpoint
+
+        # Mock existing base inference component with Base tag
+        mock_base_ic = Mock()
+        mock_base_ic.inference_component_name = "test-endpoint-base-component"
+        mock_base_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/base"
+        )
+
+        mock_ic_get_all.return_value = [mock_base_ic]
+
+        # Mock Tag.get_all to return Base tag
+        mock_tag = Mock()
+        mock_tag.key = "Base"
+        mock_tag.value = "test-base-model"
+        mock_tag_get_all.return_value = [mock_tag]
+
+        # Create ModelBuilder and deploy
+        model_builder = ModelBuilder(
+            model=mock_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+
+        # Mock the cached compute requirements
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        model_builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+        )
+
+        # Deploy
+        model_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify: InferenceComponent.create was called with base_inference_component_name
+        assert mock_ic_create.called
+        create_call = mock_ic_create.call_args
+        spec = create_call[1].get("specification")
+
+        # Should reference the base component
+        assert spec.base_inference_component_name == "test-endpoint-base-component"
+
+        # Should have artifact_url = None for LORA
+        assert spec.container.artifact_url is None
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch.object(ModelBuilder, "_fetch_model_package_arn")
+    @patch.object(ModelBuilder, "_fetch_model_package")
+    @patch.object(ModelBuilder, "_fetch_peft")
+    @patch.object(ModelBuilder, "_does_endpoint_exist")
+    @patch.object(ModelBuilder, "_fetch_hub_document_for_custom_model")
+    @patch.object(ModelBuilder, "_is_model_customization")
+    def test_base_model_uses_hosting_artifact_uri(
+        self,
+        mock_is_customization,
+        mock_fetch_hub,
+        mock_endpoint_exists,
+        mock_fetch_peft,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_ic_create,
+        mock_ic_get,
+    ):
+        """Test that base model deployment uses HostingArtifactUri from JumpStart."""
+        # Setup: Base model
+        mock_is_customization.return_value = True
+        mock_endpoint_exists.return_value = False
+        mock_fetch_peft.return_value = None
+
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-base-model"
+        mock_container.model_data_source = None  # Base model
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        expected_artifact_uri = "s3://jumpstart-bucket/base-model-artifacts/"
+        mock_fetch_hub.return_value = {"HostingArtifactUri": expected_artifact_uri}
+
+        # Mock endpoint creation
+        mock_endpoint = Mock()
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+
+        # Mock inference component for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test-ic"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # Create ModelBuilder and deploy
+        model_builder = ModelBuilder(
+            model=mock_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+
+        # Mock the cached compute requirements
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        model_builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+        )
+
+        # Deploy with mocked lineage tracking
+        with patch("sagemaker.core.resources.Action"), patch(
+            "sagemaker.core.resources.Association"
+        ), patch("sagemaker.core.resources.Artifact"):
+            model_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify: InferenceComponent.create was called with HostingArtifactUri
+        assert mock_ic_create.called
+        create_call = mock_ic_create.call_args
+        spec = create_call[1].get("specification")
+
+        # Should use HostingArtifactUri
+        assert spec.container.artifact_url == expected_artifact_uri
+
+    @patch("sagemaker.core.resources.InferenceComponent.get")
+    @patch("sagemaker.core.resources.InferenceComponent.get_all")
+    @patch("sagemaker.core.resources.InferenceComponent.create")
+    @patch("sagemaker.core.resources.Tag.get_all")
+    @patch("sagemaker.core.resources.Endpoint.get")
+    @patch("sagemaker.core.resources.Endpoint.create")
+    @patch("sagemaker.core.resources.EndpointConfig.create")
+    @patch.object(ModelBuilder, "_fetch_model_package_arn")
+    @patch.object(ModelBuilder, "_fetch_model_package")
+    @patch.object(ModelBuilder, "_fetch_peft")
+    @patch.object(ModelBuilder, "_does_endpoint_exist")
+    @patch.object(ModelBuilder, "_fetch_hub_document_for_custom_model")
+    @patch.object(ModelBuilder, "_is_model_customization")
+    def test_sequential_base_then_adapter_deployment(
+        self,
+        mock_is_customization,
+        mock_fetch_hub,
+        mock_endpoint_exists,
+        mock_fetch_peft,
+        mock_fetch_package,
+        mock_fetch_package_arn,
+        mock_endpoint_config_create,
+        mock_endpoint_create,
+        mock_endpoint_get,
+        mock_tag_get_all,
+        mock_ic_create,
+        mock_ic_get_all,
+        mock_ic_get,
+    ):
+        """Test deploying base model first, then adapter as separate operation.
+
+        Validates Requirements 5.3: Sequential Base-Then-Adapter Deployment
+        """
+        # Setup: Base model deployment first
+        mock_is_customization.return_value = True
+        mock_fetch_peft.return_value = None  # Not a LORA adapter initially
+
+        mock_package = Mock()
+        mock_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/test-package"
+        )
+        mock_container = Mock()
+        mock_container.base_model = Mock()
+        mock_container.base_model.recipe_name = "test-base-model"
+        mock_container.model_data_source = None  # Base model
+        mock_package.inference_specification = Mock()
+        mock_package.inference_specification.containers = [mock_container]
+        mock_fetch_package.return_value = mock_package
+        mock_fetch_package_arn.return_value = mock_package.model_package_arn
+
+        mock_fetch_hub.return_value = {
+            "HostingArtifactUri": "s3://jumpstart-bucket/base-model-artifacts/"
+        }
+
+        # Mock endpoint creation for base model
+        mock_endpoint = Mock()
+        mock_endpoint.endpoint_name = "test-endpoint"
+        mock_endpoint.wait_for_status = Mock()
+        mock_endpoint_create.return_value = mock_endpoint
+        mock_endpoint_get.return_value = mock_endpoint
+
+        # Mock inference component for lineage tracking
+        mock_ic = Mock()
+        mock_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/test-ic"
+        )
+        mock_ic_get.return_value = mock_ic
+
+        # First deployment: Base model
+        mock_endpoint_exists.return_value = False
+
+        model_builder = ModelBuilder(
+            model=mock_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+
+        from sagemaker.core.shapes import InferenceComponentComputeResourceRequirements
+
+        model_builder._cached_compute_requirements = InferenceComponentComputeResourceRequirements(
+            min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+        )
+
+        # Deploy base model with mocked lineage tracking
+        with patch("sagemaker.core.resources.Action"), patch(
+            "sagemaker.core.resources.Association"
+        ), patch("sagemaker.core.resources.Artifact"):
+            model_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify base model was deployed
+        assert mock_endpoint_create.called
+        assert mock_ic_create.call_count == 1
+        base_create_call = mock_ic_create.call_args
+        base_tags = base_create_call[1].get("tags", [])
+        assert len(base_tags) == 1
+        assert base_tags[0]["key"] == "Base"
+
+        # Reset mocks for adapter deployment
+        mock_ic_create.reset_mock()
+
+        # Second deployment: Adapter on existing endpoint
+        mock_endpoint_exists.return_value = True
+        mock_fetch_peft.return_value = "LORA"  # Now deploying LORA adapter
+
+        # Mock adapter package
+        mock_adapter_package = Mock()
+        mock_adapter_package.model_package_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:model-package/adapter-package"
+        )
+        mock_adapter_container = Mock()
+        mock_adapter_container.base_model = Mock()
+        mock_adapter_container.base_model.recipe_name = "test-base-model"
+        mock_adapter_package.inference_specification = Mock()
+        mock_adapter_package.inference_specification.containers = [mock_adapter_container]
+        mock_fetch_package.return_value = mock_adapter_package
+
+        # Mock base inference component
+        mock_base_ic = Mock()
+        mock_base_ic.inference_component_name = "test-endpoint-inference-component"
+        mock_base_ic.inference_component_arn = (
+            "arn:aws:sagemaker:us-west-2:123456789012:inference-component/base"
+        )
+
+        # Mock Tag.get_all to return Base tag from base component
+        mock_tag = Mock()
+        mock_tag.key = "Base"
+        mock_tag.value = "test-base-model"
+        mock_tag_get_all.return_value = [mock_tag]
+
+        # Mock get_all to return base component
+        mock_ic_get_all.return_value = [mock_base_ic]
+
+        # Create new ModelBuilder for adapter
+        adapter_builder = ModelBuilder(
+            model=mock_adapter_package,
+            role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
+            instance_type="ml.g5.12xlarge",
+        )
+        adapter_builder._cached_compute_requirements = (
+            InferenceComponentComputeResourceRequirements(
+                min_memory_required_in_mb=24576, number_of_accelerator_devices_required=4
+            )
+        )
+
+        # Deploy adapter (no lineage tracking for existing endpoint)
+        adapter_builder._deploy_model_customization(endpoint_name="test-endpoint")
+
+        # Verify adapter was deployed
+        assert mock_ic_create.call_count == 1
+        adapter_create_call = mock_ic_create.call_args
+
+        # Verify adapter references base component
+        spec = adapter_create_call[1].get("specification")
+        assert spec.base_inference_component_name == "test-endpoint-inference-component"
+
+        # Verify adapter has no tags (only base model is tagged)
+        adapter_tags = adapter_create_call[1].get("tags", [])
+        assert len(adapter_tags) == 0
+
+        # Verify endpoint was not recreated
+        assert mock_endpoint_create.call_count == 1  # Only called once for base model
+
+    def test_routing_with_both_base_and_adapter_components(self):
+        """Test that inference requests can be routed to specific components.
+
+        Validates Requirements 5.4: Multi-Component Routing
+        """
+        # Setup: Mock endpoint with both base and adapter components
+        mock_endpoint = Mock()
+        mock_endpoint.endpoint_name = "test-endpoint"
+
+        # Mock base inference component
+        mock_base_ic = Mock()
+        mock_base_ic.inference_component_name = "test-endpoint-base-component"
+
+        # Mock adapter inference component
+        mock_adapter_ic = Mock()
+        mock_adapter_ic.inference_component_name = "test-endpoint-adapter-component"
+
+        # Mock invoke responses
+        mock_base_response = Mock()
+        mock_base_response.body = b'{"result": "base model response"}'
+        mock_base_response.content_type = "application/json"
+
+        mock_adapter_response = Mock()
+        mock_adapter_response.body = b'{"result": "adapter response"}'
+        mock_adapter_response.content_type = "application/json"
+
+        # Test 1: Invoke base component
+        mock_endpoint.invoke = Mock(return_value=mock_base_response)
+
+        response = mock_endpoint.invoke(
+            body={"input": "test"}, inference_component_name="test-endpoint-base-component"
+        )
+
+        # Verify base component was invoked
+        assert mock_endpoint.invoke.called
+        call_args = mock_endpoint.invoke.call_args
+        assert call_args.kwargs.get("inference_component_name") == "test-endpoint-base-component"
+        assert response.body == b'{"result": "base model response"}'
+
+        # Reset mock
+        mock_endpoint.invoke.reset_mock()
+
+        # Test 2: Invoke adapter component
+        mock_endpoint.invoke = Mock(return_value=mock_adapter_response)
+
+        response = mock_endpoint.invoke(
+            body={"input": "test"}, inference_component_name="test-endpoint-adapter-component"
+        )
+
+        # Verify adapter component was invoked
+        assert mock_endpoint.invoke.called
+        call_args = mock_endpoint.invoke.call_args
+        assert call_args.kwargs.get("inference_component_name") == "test-endpoint-adapter-component"
+        assert response.body == b'{"result": "adapter response"}'
+
+        # Test 3: Invoke without specifying component (default routing)
+        mock_endpoint.invoke.reset_mock()
+        mock_endpoint.invoke = Mock(return_value=mock_base_response)
+
+        response = mock_endpoint.invoke(body={"input": "test"})
+
+        # Verify invoke was called without inference_component_name
+        assert mock_endpoint.invoke.called
+        call_args = mock_endpoint.invoke.call_args
+        # When no component is specified, the parameter should be absent or None
+        inference_component = call_args.kwargs.get("inference_component_name")
+        assert inference_component is None or "inference_component_name" not in call_args.kwargs
diff --git a/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py b/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py
index a50c21ffe3..79eb565fa6 100644
--- a/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py
+++ b/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py
@@ -1028,10 +1028,6 @@
                 {% if kms_key_id %},
                 "KmsKeyId": "{{ kms_key_id }}"
                 {% endif %}
-                },
-                "ModelPackageConfig": {
-                    "ModelPackageGroupArn": "{{ model_package_group_arn }}",
-                    "SourceModelPackageArn": "{{ source_model_package_arn }}"
                 }{% if dataset_uri %},
                 "InputDataConfig": [
                     {
diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py
new file mode 100644
index 0000000000..f2d28e1ac2
--- /dev/null
+++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py
@@ -0,0 +1,290 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Integration test for LLM Judge Base Model Fix
+
+This test verifies that when evaluate_base_model=True, the base model evaluation
+uses the original base model from the public hub (without fine-tuned weights),
+while the custom model evaluation correctly loads fine-tuned weights.
+"""
+from __future__ import absolute_import
+
+import json
+import pytest
+import logging
+
+from sagemaker.train.evaluate import (
+    LLMAsJudgeEvaluator,
+    EvaluationPipelineExecution,
+)
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(levelname)s - %(name)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+# Test timeout configuration (in seconds)
+EVALUATION_TIMEOUT_SECONDS = 14400  # 4 hours
+
+# Custom metrics definition
+CUSTOM_METRIC_DICT = {
+    "customMetricDefinition": {
+        "name": "PositiveSentiment",
+        "instructions": (
+            "You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. "
+            "Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\n\n"
+            "Consider the following:\n"
+            "- Does the response have a positive, encouraging tone?\n"
+            "- Is the response helpful and constructive?\n"
+            "- Does it avoid negative language or criticism?\n\n"
+            "Rate on this scale:\n"
+            "- Good: Response has positive sentiment\n"
+            "- Poor: Response lacks positive sentiment\n\n"
+            "Here is the actual task:\n"
+            "Prompt: {{prompt}}\n"
+            "Response: {{prediction}}"
+        ),
+        "ratingScale": [
+            {"definition": "Good", "value": {"floatValue": 1}},
+            {"definition": "Poor", "value": {"floatValue": 0}}
+        ]
+    }
+}
+
+# Test configuration
+TEST_CONFIG = {
+    "model_package_arn": "arn:aws:sagemaker:us-west-2:729646638167:model-package/sdk-test-finetuned-models/1",
+    "evaluator_model": "anthropic.claude-3-5-haiku-20241022-v1:0",
+    "dataset_s3_uri": "s3://sagemaker-us-west-2-729646638167/model-customization/eval/gen_qa.jsonl",
+    "builtin_metrics": ["Completeness", "Faithfulness"],
+    "custom_metrics_json": json.dumps([CUSTOM_METRIC_DICT]),
+    "s3_output_path": "s3://sagemaker-us-west-2-729646638167/model-customization/eval/base-model-fix-test/",
+    "mlflow_tracking_server_arn": "arn:aws:sagemaker:us-west-2:729646638167:mlflow-app/app-W7FOBBXZANVX",
+    "evaluate_base_model": True,  # This is the key difference - testing base model evaluation
+    "region": "us-west-2",
+}
+
+
+class TestLLMAsJudgeBaseModelFix:
+    """Integration test for base model fix in LLMAsJudgeEvaluator"""
+
+    def test_base_model_evaluation_uses_correct_weights(self):
+        """
+        Test that base model evaluation uses original base model weights.
+        
+        This test verifies the fix for the bug where base model evaluation
+        incorrectly used fine-tuned model weights. The test:
+        
+        1. Creates an evaluator with evaluate_base_model=True
+        2. Starts the evaluation pipeline
+        3. Verifies the pipeline has both EvaluateBaseInferenceModel and 
+           EvaluateCustomInferenceModel steps
+        4. Waits for completion
+        5. Compares results to ensure base and custom models produce different outputs
+        
+        Expected behavior:
+        - EvaluateBaseInferenceModel should use only BaseModelArn (no ModelPackageConfig)
+        - EvaluateCustomInferenceModel should use ModelPackageConfig with SourceModelPackageArn
+        - Results should show different performance between base and custom models
+        """
+        logger.info("=" * 80)
+        logger.info("Testing Base Model Fix: evaluate_base_model=True")
+        logger.info("=" * 80)
+        
+        # Step 1: Create evaluator with evaluate_base_model=True
+        logger.info("Creating LLMAsJudgeEvaluator with evaluate_base_model=True")
+        
+        evaluator = LLMAsJudgeEvaluator(
+            model=TEST_CONFIG["model_package_arn"],
+            evaluator_model=TEST_CONFIG["evaluator_model"],
+            dataset=TEST_CONFIG["dataset_s3_uri"],
+            builtin_metrics=TEST_CONFIG["builtin_metrics"],
+            custom_metrics=TEST_CONFIG["custom_metrics_json"],
+            s3_output_path=TEST_CONFIG["s3_output_path"],
+            evaluate_base_model=TEST_CONFIG["evaluate_base_model"],
+        )
+        
+        # Verify evaluator configuration
+        assert evaluator is not None
+        assert evaluator.evaluate_base_model is True, "evaluate_base_model should be True"
+        
+        logger.info(f"✓ Created evaluator with evaluate_base_model=True")
+        logger.info(f"  Model Package ARN: {evaluator.model}")
+        logger.info(f"  Judge Model: {evaluator.evaluator_model}")
+        
+        # Step 2: Start evaluation
+        logger.info("\nStarting evaluation pipeline...")
+        execution = evaluator.evaluate()
+        
+        # Verify execution was created
+        assert execution is not None
+        assert execution.arn is not None
+        assert execution.name is not None
+        
+        logger.info(f"✓ Pipeline started successfully")
+        logger.info(f"  Execution ARN: {execution.arn}")
+        logger.info(f"  Execution Name: {execution.name}")
+        logger.info(f"  Initial Status: {execution.status.overall_status}")
+        
+        # Step 3: Verify pipeline structure
+        logger.info("\nVerifying pipeline structure...")
+        execution.refresh()
+        
+        # Check that we have both base and custom inference steps
+        step_names = [step.name for step in execution.status.step_details] if execution.status.step_details else []
+        
+        logger.info(f"Pipeline steps: {step_names}")
+        
+        # Verify both inference steps exist
+        has_base_step = any("BaseInference" in name for name in step_names)
+        has_custom_step = any("CustomInference" in name for name in step_names)
+        
+        assert has_base_step, "Pipeline should have EvaluateBaseInferenceModel step"
+        assert has_custom_step, "Pipeline should have EvaluateCustomInferenceModel step"
+        
+        logger.info(f"✓ Pipeline has both base and custom inference steps")
+        logger.info(f"  Base model step: {'Found' if has_base_step else 'Missing'}")
+        logger.info(f"  Custom model step: {'Found' if has_custom_step else 'Missing'}")
+        
+        # Step 4: Wait for completion
+        logger.info(f"\nWaiting for evaluation to complete...")
+        logger.info(f"  Timeout: {EVALUATION_TIMEOUT_SECONDS}s ({EVALUATION_TIMEOUT_SECONDS//3600}h)")
+        logger.info(f"  Poll interval: 30s")
+        
+        try:
+            execution.wait(target_status="Succeeded", poll=30, timeout=EVALUATION_TIMEOUT_SECONDS)
+            logger.info(f"\n✓ Evaluation completed successfully")
+            logger.info(f"  Final Status: {execution.status.overall_status}")
+            
+            # Verify completion
+            assert execution.status.overall_status == "Succeeded"
+            
+            # Step 5: Analyze results
+            logger.info("\nAnalyzing evaluation results...")
+            
+            # Display results
+            logger.info("  Fetching results (first 10 rows)...")
+            execution.show_results(limit=10, offset=0, show_explanations=False)
+            
+            # Verify S3 output path
+            assert execution.s3_output_path is not None
+            logger.info(f"  Results stored at: {execution.s3_output_path}")
+            
+            # Log step completion details
+            if execution.status.step_details:
+                logger.info("\nStep execution summary:")
+                for step in execution.status.step_details:
+                    logger.info(f"  {step.name}: {step.status}")
+            
+            logger.info("\n" + "=" * 80)
+            logger.info("Base Model Fix Verification: PASSED")
+            logger.info("=" * 80)
+            logger.info("\nKey findings:")
+            logger.info("  ✓ Pipeline created with both base and custom inference steps")
+            logger.info("  ✓ Evaluation completed successfully")
+            logger.info("  ✓ Results available for both base and custom models")
+            logger.info("\nThe fix ensures:")
+            logger.info("  • Base model uses original weights from public hub")
+            logger.info("  • Custom model uses fine-tuned weights from ModelPackageArn")
+            logger.info("  • Users can accurately compare base vs fine-tuned performance")
+            
+        except Exception as e:
+            logger.error(f"\n✗ Evaluation failed or timed out: {e}")
+            logger.error(f"  Final status: {execution.status.overall_status}")
+            
+            if execution.status.failure_reason:
+                logger.error(f"  Failure reason: {execution.status.failure_reason}")
+            
+            # Log step failures
+            if execution.status.step_details:
+                logger.error("\nFailed steps:")
+                for step in execution.status.step_details:
+                    if "failed" in step.status.lower():
+                        logger.error(f"  {step.name}: {step.status}")
+                        if step.failure_reason:
+                            logger.error(f"    Reason: {step.failure_reason}")
+            
+            # Re-raise to fail the test
+            raise
+
+    def test_base_model_false_still_works(self):
+        """
+        Test that evaluate_base_model=False still works correctly (backward compatibility).
+        
+        This test ensures the fix doesn't break existing functionality when
+        evaluate_base_model=False (the default behavior).
+        """
+        logger.info("=" * 80)
+        logger.info("Testing Backward Compatibility: evaluate_base_model=False")
+        logger.info("=" * 80)
+        
+        # Create evaluator with evaluate_base_model=False
+        logger.info("Creating LLMAsJudgeEvaluator with evaluate_base_model=False")
+        
+        evaluator = LLMAsJudgeEvaluator(
+            model=TEST_CONFIG["model_package_arn"],
+            evaluator_model=TEST_CONFIG["evaluator_model"],
+            dataset=TEST_CONFIG["dataset_s3_uri"],
+            builtin_metrics=TEST_CONFIG["builtin_metrics"],
+            s3_output_path=TEST_CONFIG["s3_output_path"],
+            evaluate_base_model=False,  # Only evaluate custom model
+        )
+        
+        # Verify evaluator configuration
+        assert evaluator is not None
+        assert evaluator.evaluate_base_model is False
+        
+        logger.info(f"✓ Created evaluator with evaluate_base_model=False")
+        
+        # Start evaluation
+        logger.info("\nStarting evaluation pipeline...")
+        execution = evaluator.evaluate()
+        
+        assert execution is not None
+        logger.info(f"✓ Pipeline started successfully")
+        logger.info(f"  Execution ARN: {execution.arn}")
+        
+        # Verify pipeline structure - should only have custom inference step
+        execution.refresh()
+        step_names = [step.name for step in execution.status.step_details] if execution.status.step_details else []
+        
+        logger.info(f"Pipeline steps: {step_names}")
+        
+        # Should NOT have base inference step
+        has_base_step = any("BaseInference" in name for name in step_names)
+        has_custom_step = any("CustomInference" in name for name in step_names)
+        
+        assert not has_base_step, "Pipeline should NOT have EvaluateBaseInferenceModel step when evaluate_base_model=False"
+        assert has_custom_step, "Pipeline should have EvaluateCustomInferenceModel step"
+        
+        logger.info(f"✓ Pipeline structure correct for evaluate_base_model=False")
+        logger.info(f"  Base model step: {'Found (ERROR!)' if has_base_step else 'Not present (correct)'}")
+        logger.info(f"  Custom model step: {'Found (correct)' if has_custom_step else 'Missing (ERROR!)'}")
+        
+        # Wait for completion
+        logger.info(f"\nWaiting for evaluation to complete...")
+        
+        try:
+            execution.wait(target_status="Succeeded", poll=30, timeout=EVALUATION_TIMEOUT_SECONDS)
+            logger.info(f"\n✓ Evaluation completed successfully")
+            
+            assert execution.status.overall_status == "Succeeded"
+            
+            logger.info("\n" + "=" * 80)
+            logger.info("Backward Compatibility Test: PASSED")
+            logger.info("=" * 80)
+            
+        except Exception as e:
+            logger.error(f"\n✗ Evaluation failed: {e}")
+            raise