intel · wenhuach21 · Jun 9, 2026
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
@@ -23,11 +23,9 @@
 from auto_round.compressors.base import BaseCompressor
 from auto_round.eval.eval_cli import EvalArgumentParser, eval, eval_task_by_task
 from auto_round.eval.evaluation import run_model_evaluation
-from auto_round.schemes import PRESET_SCHEMES, preset_name_to_scheme
+from auto_round.schemes import PRESET_SCHEMES
 from auto_round.utils import (
     clear_memory,
-    get_device_and_parallelism,
-    get_model_dtype,
     parse_layer_config_arg,
 )
 

diff --git a/auto_round/algorithms/quantization/adam_round/adam.py b/auto_round/algorithms/quantization/adam_round/adam.py
@@ -11,14 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Union
 
 import torch
 
 from auto_round.algorithms.quantization.sign_round.quantizer import SignRoundQuantizer
-from auto_round.schemes import QuantizationScheme
 from auto_round.utils import check_is_cpu, htcore, is_hpex_available
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 
 
 class AdamRoundQuantizer(SignRoundQuantizer):

diff --git a/auto_round/algorithms/quantization/awq/quantizer.py b/auto_round/algorithms/quantization/awq/quantizer.py
@@ -60,7 +60,7 @@
     set_amax_for_all_moe_layers,
     set_module,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.wrapper import WrapperLinear
 from auto_round.wrapper import WrapperMultiblock as _WrapperMultiblock
 

diff --git a/auto_round/algorithms/quantization/base.py b/auto_round/algorithms/quantization/base.py
@@ -36,7 +36,7 @@
     get_module,
     set_module,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.wrapper import WrapperLinear
 
 

diff --git a/auto_round/algorithms/quantization/sign_round/quantizer.py b/auto_round/algorithms/quantization/sign_round/quantizer.py
@@ -12,12 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import copy
-from collections import defaultdict
 from contextlib import nullcontext
-from functools import partial
 from typing import Any, Callable, Optional, Union
 
-import accelerate
 import torch
 from torch import autocast
 
@@ -26,12 +23,10 @@
 from auto_round.algorithms.quantization.sign_round.sign_sgd import SignSGD
 from auto_round.compressors.utils import (
     IndexSampler,
-    block_forward,
     check_need_act_calibration,
     collect_best_params,
-    immediate_pack,
 )
-from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, update_fused_layer_global_scales
+from auto_round.data_type.utils import update_fused_layer_global_scales
 from auto_round.logger import logger
 from auto_round.utils import (
     get_module,
@@ -42,8 +37,8 @@
     set_module,
     to_device,
 )
-from auto_round.utils.device import clear_memory_if_reached_threshold
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.utils import clear_memory_if_reached_threshold
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.utils.distributed import setup_ddp_if_needed_
 from auto_round.wrapper import WrapperLinear, unwrapper_block, unwrapper_layer, wrapper_block
 

diff --git a/auto_round/auto_scheme/delta_loss.py b/auto_round/auto_scheme/delta_loss.py
@@ -58,8 +58,8 @@
     to_device,
     to_dtype,
 )
-from auto_round.utils.device import MemoryMonitor
-from auto_round.utils.device_manager import get_current_device_manager
+from auto_round.devices.utils import MemoryMonitor
+from auto_round.devices.device_manager_haha import get_current_ar_device
 from auto_round.utils.offload import OffloadManager
 from auto_round.wrapper import WrapperLinear
 
@@ -442,7 +442,7 @@ def backward_pre_hook(module, grad_input):
         """Hook executed before backward propagation."""
         global last_grad_input
         last_grad_input = grad_input
-        get_current_device_manager().synchronize()
+        get_current_ar_device().synchronize()
         raise MyCustomError("Interrupt backward pass")
 
     for data in dataloader:

diff --git a/auto_round/calibration/diffusion.py b/auto_round/calibration/diffusion.py
@@ -27,7 +27,7 @@
 from auto_round.calibration.llm import LLMCalibrator
 from auto_round.calibration.register import register_calibrator
 from auto_round.logger import logger
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.utils.model import wrap_block_forward_positional_to_kwargs
 
 

diff --git a/auto_round/calibration/inputs.py b/auto_round/calibration/inputs.py
@@ -18,7 +18,7 @@
 import torch
 
 from auto_round.utils import clear_memory, to_device, to_dtype
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 
 __all__ = ["split_inputs", "preprocess_block_inputs"]
 

diff --git a/auto_round/calibration/llm.py b/auto_round/calibration/llm.py
@@ -41,8 +41,8 @@
     to_device,
     to_dtype,
 )
-from auto_round.utils.device import parse_available_devices
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.utils import parse_available_devices
+from auto_round.devices.device_manager_haha import device_manager
 
 
 @register_calibrator("llm")

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -14,7 +14,6 @@
 import copy
 import gc
 import os
-import sys
 from dataclasses import asdict, dataclass, fields
 from typing import Any, Optional, Union
 
@@ -40,7 +39,7 @@
     get_gguf_scheme,
     preset_name_to_scheme,
 )
-from auto_round.special_model_handler import get_predefined_fixed_attr, get_predefined_ignore_layers, update_module
+from auto_round.special_model_handler import get_predefined_fixed_attr, get_predefined_ignore_layers
 from auto_round.utils import (
     AUDIO_MM_KEYS,
     INNER_SUPPORTED_LAYER_TYPES,
@@ -60,12 +59,12 @@
     preserve_original_visual_block_name,
     revert_checkpoint_conversion_mapping,
 )
-from auto_round.utils.device import (
+from auto_round.devices.utils import (
     _force_trim_malloc,
     patch_xpu_sdpa_drop_causal_mask,
     set_non_auto_device_map,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.utils.offload import OffloadManager
 
 
@@ -283,7 +282,6 @@ def __init__(
 
         if is_hpex_available():
             logger.info("habana_frameworks is available, import htcore explicitly.")
-            import habana_frameworks.torch.core as htcore  # pylint: disable=E0401
 
         # Reset both context singletons before creating fresh instances so that
         # consecutive AutoRound creations don't inherit stale config from earlier ones.

diff --git a/auto_round/compressors/data_driven.py b/auto_round/compressors/data_driven.py
@@ -16,58 +16,46 @@
 import time
 import traceback
 from functools import partial
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Union
 
 import accelerate
 import torch
-from accelerate.big_modeling import dispatch_model, infer_auto_device_map
-from accelerate.utils import get_balanced_memory, get_max_memory
+from accelerate.big_modeling import dispatch_model
 from tqdm import tqdm
 
-from auto_round import envs
 from auto_round.algorithms.alg_config import AlgConfig
 from auto_round.calibration.utils import (
-    _infer_last_cache_name,
-    _split_inputs_diffusion,
     _update_inputs,
 )
 from auto_round.compressors.base import BaseCompressor
 from auto_round.compressors.utils import (
     _get_quantized_layer_names_outside_blocks,
-    check_skippable_keywords,
     immediate_pack,
-    init_cache,
     is_nv_fp,
     is_static_wfp8afp8,
-    reset_params,
 )
 from auto_round.logger import logger
 from auto_round.modeling.fused_moe.replace_modules import materialize_model_, safe_to_cpu_
 from auto_round.utils import (
     SUPPORTED_LAYER_TYPES,
-    check_seqlen_compatible,
     check_to_quantized,
     clear_memory,
     compress_layer_names,
     convert_module_to_hp_if_necessary,
     flatten_list,
     get_block_names,
     get_module,
-    hook_ngram_embeddings_on_cpu,
     is_auto_device_mapping,
-    is_quantized_input_module,
     memory_monitor,
     mv_module_from_gpu,
     set_amax_for_all_moe_layers,
     to_device,
     to_dtype,
-    wrap_block_forward_positional_to_kwargs,
 )
-from auto_round.utils.device import (
+from auto_round.devices.utils import (
     _force_trim_malloc,
-    parse_available_devices,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 from auto_round.wrapper import WrapperMultiblock
 
 
@@ -344,7 +332,7 @@ def quantize_block(
                     and len(device_manager.device_list) > 1
                     and not self.model_context.is_diffusion
                 ):
-                    from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
+                    from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning
 
                     card_0_in_high_risk, loss_device = set_auto_device_map_for_block_with_tuning(
                         block,
@@ -495,7 +483,7 @@ def _quantize_blocks(
                 and len(device_manager.device_list) > 1
                 and not self.model_context.is_diffusion
             ):
-                from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
+                from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning
 
                 card_0_in_high_risk, loss_device = set_auto_device_map_for_block_with_tuning(
                     m,
@@ -1000,7 +988,7 @@ def process_input_others(input_others):
                     and len(device_manager.device_list) > 1
                     and not self.model_context.is_diffusion
                 ):
-                    from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
+                    from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning
 
                     set_auto_device_map_for_block_with_tuning(
                         block,

diff --git a/auto_round/compressors/diffusion_mixin.py b/auto_round/compressors/diffusion_mixin.py
@@ -13,19 +13,18 @@
 # limitations under the License.
 import inspect
 import os
-from typing import Union
 
 import torch
 from tqdm import tqdm
 
 from auto_round.logger import logger
 from auto_round.utils import clear_memory
-from auto_round.utils.device import (
+from auto_round.devices.utils import (
     dispatch_model_block_wise,
     dispatch_model_by_all_available_devices,
     get_major_device,
 )
-from auto_round.utils.device_manager import device_manager, is_auto_device_mapping
+from auto_round.devices.device_manager_haha import device_manager, is_auto_device_mapping
 from auto_round.utils.model import rename_weights_files
 
 

diff --git a/auto_round/compressors/model_free.py b/auto_round/compressors/model_free.py
@@ -73,11 +73,10 @@
 
 import torch
 
-from auto_round import envs
 from auto_round.logger import logger
 from auto_round.schemes import PRESET_SCHEMES, QuantizationScheme, preset_name_to_scheme
 from auto_round.utils.common import AUDIO_MM_KEYS, VISION_MM_KEYS, compress_layer_names, to_standard_regex
-from auto_round.utils.device import clear_memory, memory_monitor
+from auto_round.devices.utils import clear_memory, memory_monitor
 from auto_round.utils.missing_tensors import quantize_weight_rtn, split_fused_expert_tensors
 
 # ---------------------------------------------------------------------------

diff --git a/auto_round/compressors/utils.py b/auto_round/compressors/utils.py
@@ -35,7 +35,7 @@
     get_module,
     to_standard_regex,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 
 if TYPE_CHECKING:
     from auto_round.schemes import QuantizationScheme

diff --git a/auto_round/context/compress.py b/auto_round/context/compress.py
@@ -11,18 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, Optional, Union
+from typing import Optional, Union
 
 import torch
 
 from auto_round.context.base import BaseContext
-from auto_round.utils.device import (
+from auto_round.devices.utils import (
     clear_memory,
-    clear_memory_if_reached_threshold,
-    set_auto_device_map_for_block_with_tuning,
-    set_non_auto_device_map,
 )
-from auto_round.utils.device_manager import device_manager
+from auto_round.devices.device_manager_haha import device_manager
 
 __all__ = ["CompressContext"]
 

diff --git a/auto_round/context/model.py b/auto_round/context/model.py
@@ -14,7 +14,7 @@
 
 import gc
 import importlib
-from typing import Any, Callable, Optional, Union
+from typing import Optional
 
 import torch
 from packaging import version
@@ -37,8 +37,8 @@
     mllm_load_model,
     unsupported_meta_device,
 )
-from auto_round.utils.device import _force_trim_malloc
-from auto_round.utils.device_manager import device_manager, get_ar_device
+from auto_round.devices.utils import _force_trim_malloc
+from auto_round.devices.device_manager_haha import device_manager, get_ar_device
 
 __all__ = ["ModelContext"]
 

diff --git a/auto_round/data_type/gguf.py b/auto_round/data_type/gguf.py
@@ -11,17 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, Union
+from typing import Union
 
 import torch
 
 from auto_round.data_type.register import register_dtype
 from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, revert_tensor_by_pad, round_ste
 from auto_round.export.export_to_gguf.config import GGML_QUANT_SIZES
-from auto_round.export.export_to_gguf.packing import make_q3_quants, make_qx_quants, make_qx_quants_chunk
+from auto_round.export.export_to_gguf.packing import make_q3_quants, make_qx_quants_chunk
 from auto_round.logger import logger
 from auto_round.utils import get_reciprocal
-from auto_round.utils.device import clear_memory
+from auto_round.devices.utils import clear_memory
 
 
 @register_dtype("int_sym_dq")
@@ -789,7 +789,7 @@ def quant_tensor_gguf_sym_dq(
         Quantized and de-quantized tensor, scale, zero-point
     """
 
-    from auto_round.export.export_to_gguf.config import K_SCALE_SIZE, QK_K
+    from auto_round.export.export_to_gguf.config import QK_K
 
     if bits not in [3, 6]:
         raise KeyError(f"bits={bits} is not supported by gguf_int_sym_dq, please check.")

diff --git a/auto_round/devices/__init__.py b/auto_round/devices/__init__.py
@@ -0,0 +1 @@
+from auto_round.devices.device_manager_haha import device_manager
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from auto_round.devices.device_manager_haha import device_manager