Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@
from auto_round.compressors.base import BaseCompressor
from auto_round.eval.eval_cli import EvalArgumentParser, eval, eval_task_by_task
from auto_round.eval.evaluation import run_model_evaluation
from auto_round.schemes import PRESET_SCHEMES, preset_name_to_scheme
from auto_round.schemes import PRESET_SCHEMES
from auto_round.utils import (
clear_memory,
get_device_and_parallelism,
get_model_dtype,
parse_layer_config_arg,
)

Expand Down
4 changes: 1 addition & 3 deletions auto_round/algorithms/quantization/adam_round/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import torch

from auto_round.algorithms.quantization.sign_round.quantizer import SignRoundQuantizer
from auto_round.schemes import QuantizationScheme
from auto_round.utils import check_is_cpu, htcore, is_hpex_available
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager


class AdamRoundQuantizer(SignRoundQuantizer):
Expand Down
2 changes: 1 addition & 1 deletion auto_round/algorithms/quantization/awq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
set_amax_for_all_moe_layers,
set_module,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager
from auto_round.wrapper import WrapperLinear
from auto_round.wrapper import WrapperMultiblock as _WrapperMultiblock

Expand Down
2 changes: 1 addition & 1 deletion auto_round/algorithms/quantization/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
get_module,
set_module,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager
from auto_round.wrapper import WrapperLinear


Expand Down
11 changes: 3 additions & 8 deletions auto_round/algorithms/quantization/sign_round/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from collections import defaultdict
from contextlib import nullcontext
from functools import partial
from typing import Any, Callable, Optional, Union

import accelerate
import torch
from torch import autocast

Expand All @@ -26,12 +23,10 @@
from auto_round.algorithms.quantization.sign_round.sign_sgd import SignSGD
from auto_round.compressors.utils import (
IndexSampler,
block_forward,
check_need_act_calibration,
collect_best_params,
immediate_pack,
)
from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, update_fused_layer_global_scales
from auto_round.data_type.utils import update_fused_layer_global_scales
from auto_round.logger import logger
from auto_round.utils import (
get_module,
Expand All @@ -42,8 +37,8 @@
set_module,
to_device,
)
from auto_round.utils.device import clear_memory_if_reached_threshold
from auto_round.utils.device_manager import device_manager
from auto_round.devices.utils import clear_memory_if_reached_threshold
from auto_round.devices.device_manager_haha import device_manager
from auto_round.utils.distributed import setup_ddp_if_needed_
from auto_round.wrapper import WrapperLinear, unwrapper_block, unwrapper_layer, wrapper_block

Expand Down
6 changes: 3 additions & 3 deletions auto_round/auto_scheme/delta_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@
to_device,
to_dtype,
)
from auto_round.utils.device import MemoryMonitor
from auto_round.utils.device_manager import get_current_device_manager
from auto_round.devices.utils import MemoryMonitor
from auto_round.devices.device_manager_haha import get_current_ar_device
from auto_round.utils.offload import OffloadManager
from auto_round.wrapper import WrapperLinear

Expand Down Expand Up @@ -442,7 +442,7 @@ def backward_pre_hook(module, grad_input):
"""Hook executed before backward propagation."""
global last_grad_input
last_grad_input = grad_input
get_current_device_manager().synchronize()
get_current_ar_device().synchronize()
raise MyCustomError("Interrupt backward pass")

for data in dataloader:
Expand Down
2 changes: 1 addition & 1 deletion auto_round/calibration/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from auto_round.calibration.llm import LLMCalibrator
from auto_round.calibration.register import register_calibrator
from auto_round.logger import logger
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager
from auto_round.utils.model import wrap_block_forward_positional_to_kwargs


Expand Down
2 changes: 1 addition & 1 deletion auto_round/calibration/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import torch

from auto_round.utils import clear_memory, to_device, to_dtype
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager

__all__ = ["split_inputs", "preprocess_block_inputs"]

Expand Down
4 changes: 2 additions & 2 deletions auto_round/calibration/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
to_device,
to_dtype,
)
from auto_round.utils.device import parse_available_devices
from auto_round.utils.device_manager import device_manager
from auto_round.devices.utils import parse_available_devices
from auto_round.devices.device_manager_haha import device_manager


@register_calibrator("llm")
Expand Down
8 changes: 3 additions & 5 deletions auto_round/compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import copy
import gc
import os
import sys
from dataclasses import asdict, dataclass, fields
from typing import Any, Optional, Union

Expand All @@ -40,7 +39,7 @@
get_gguf_scheme,
preset_name_to_scheme,
)
from auto_round.special_model_handler import get_predefined_fixed_attr, get_predefined_ignore_layers, update_module
from auto_round.special_model_handler import get_predefined_fixed_attr, get_predefined_ignore_layers
from auto_round.utils import (
AUDIO_MM_KEYS,
INNER_SUPPORTED_LAYER_TYPES,
Expand All @@ -60,12 +59,12 @@
preserve_original_visual_block_name,
revert_checkpoint_conversion_mapping,
)
from auto_round.utils.device import (
from auto_round.devices.utils import (
_force_trim_malloc,
patch_xpu_sdpa_drop_causal_mask,
set_non_auto_device_map,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager
from auto_round.utils.offload import OffloadManager


Expand Down Expand Up @@ -283,7 +282,6 @@ def __init__(

if is_hpex_available():
logger.info("habana_frameworks is available, import htcore explicitly.")
import habana_frameworks.torch.core as htcore # pylint: disable=E0401

# Reset both context singletons before creating fresh instances so that
# consecutive AutoRound creations don't inherit stale config from earlier ones.
Expand Down
26 changes: 7 additions & 19 deletions auto_round/compressors/data_driven.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,58 +16,46 @@
import time
import traceback
from functools import partial
from typing import Any, Callable, Optional, Union
from typing import Any, Callable, Union

import accelerate
import torch
from accelerate.big_modeling import dispatch_model, infer_auto_device_map
from accelerate.utils import get_balanced_memory, get_max_memory
from accelerate.big_modeling import dispatch_model
from tqdm import tqdm

from auto_round import envs
from auto_round.algorithms.alg_config import AlgConfig
from auto_round.calibration.utils import (
_infer_last_cache_name,
_split_inputs_diffusion,
_update_inputs,
)
from auto_round.compressors.base import BaseCompressor
from auto_round.compressors.utils import (
_get_quantized_layer_names_outside_blocks,
check_skippable_keywords,
immediate_pack,
init_cache,
is_nv_fp,
is_static_wfp8afp8,
reset_params,
)
from auto_round.logger import logger
from auto_round.modeling.fused_moe.replace_modules import materialize_model_, safe_to_cpu_
from auto_round.utils import (
SUPPORTED_LAYER_TYPES,
check_seqlen_compatible,
check_to_quantized,
clear_memory,
compress_layer_names,
convert_module_to_hp_if_necessary,
flatten_list,
get_block_names,
get_module,
hook_ngram_embeddings_on_cpu,
is_auto_device_mapping,
is_quantized_input_module,
memory_monitor,
mv_module_from_gpu,
set_amax_for_all_moe_layers,
to_device,
to_dtype,
wrap_block_forward_positional_to_kwargs,
)
from auto_round.utils.device import (
from auto_round.devices.utils import (
_force_trim_malloc,
parse_available_devices,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager
from auto_round.wrapper import WrapperMultiblock


Expand Down Expand Up @@ -344,7 +332,7 @@ def quantize_block(
and len(device_manager.device_list) > 1
and not self.model_context.is_diffusion
):
from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning

card_0_in_high_risk, loss_device = set_auto_device_map_for_block_with_tuning(
block,
Expand Down Expand Up @@ -495,7 +483,7 @@ def _quantize_blocks(
and len(device_manager.device_list) > 1
and not self.model_context.is_diffusion
):
from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning

card_0_in_high_risk, loss_device = set_auto_device_map_for_block_with_tuning(
m,
Expand Down Expand Up @@ -1000,7 +988,7 @@ def process_input_others(input_others):
and len(device_manager.device_list) > 1
and not self.model_context.is_diffusion
):
from auto_round.utils.device import set_auto_device_map_for_block_with_tuning
from auto_round.devices.utils import set_auto_device_map_for_block_with_tuning

set_auto_device_map_for_block_with_tuning(
block,
Expand Down
5 changes: 2 additions & 3 deletions auto_round/compressors/diffusion_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,18 @@
# limitations under the License.
import inspect
import os
from typing import Union

import torch
from tqdm import tqdm

from auto_round.logger import logger
from auto_round.utils import clear_memory
from auto_round.utils.device import (
from auto_round.devices.utils import (
dispatch_model_block_wise,
dispatch_model_by_all_available_devices,
get_major_device,
)
from auto_round.utils.device_manager import device_manager, is_auto_device_mapping
from auto_round.devices.device_manager_haha import device_manager, is_auto_device_mapping
from auto_round.utils.model import rename_weights_files


Expand Down
3 changes: 1 addition & 2 deletions auto_round/compressors/model_free.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,10 @@

import torch

from auto_round import envs
from auto_round.logger import logger
from auto_round.schemes import PRESET_SCHEMES, QuantizationScheme, preset_name_to_scheme
from auto_round.utils.common import AUDIO_MM_KEYS, VISION_MM_KEYS, compress_layer_names, to_standard_regex
from auto_round.utils.device import clear_memory, memory_monitor
from auto_round.devices.utils import clear_memory, memory_monitor
from auto_round.utils.missing_tensors import quantize_weight_rtn, split_fused_expert_tensors

# ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion auto_round/compressors/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
get_module,
to_standard_regex,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager

if TYPE_CHECKING:
from auto_round.schemes import QuantizationScheme
Expand Down
9 changes: 3 additions & 6 deletions auto_round/context/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Callable, Optional, Union
from typing import Optional, Union

import torch

from auto_round.context.base import BaseContext
from auto_round.utils.device import (
from auto_round.devices.utils import (
clear_memory,
clear_memory_if_reached_threshold,
set_auto_device_map_for_block_with_tuning,
set_non_auto_device_map,
)
from auto_round.utils.device_manager import device_manager
from auto_round.devices.device_manager_haha import device_manager

__all__ = ["CompressContext"]

Expand Down
6 changes: 3 additions & 3 deletions auto_round/context/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import gc
import importlib
from typing import Any, Callable, Optional, Union
from typing import Optional

import torch
from packaging import version
Expand All @@ -37,8 +37,8 @@
mllm_load_model,
unsupported_meta_device,
)
from auto_round.utils.device import _force_trim_malloc
from auto_round.utils.device_manager import device_manager, get_ar_device
from auto_round.devices.utils import _force_trim_malloc
from auto_round.devices.device_manager_haha import device_manager, get_ar_device

__all__ = ["ModelContext"]

Expand Down
8 changes: 4 additions & 4 deletions auto_round/data_type/gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Callable, Union
from typing import Union

import torch

from auto_round.data_type.register import register_dtype
from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, revert_tensor_by_pad, round_ste
from auto_round.export.export_to_gguf.config import GGML_QUANT_SIZES
from auto_round.export.export_to_gguf.packing import make_q3_quants, make_qx_quants, make_qx_quants_chunk
from auto_round.export.export_to_gguf.packing import make_q3_quants, make_qx_quants_chunk
from auto_round.logger import logger
from auto_round.utils import get_reciprocal
from auto_round.utils.device import clear_memory
from auto_round.devices.utils import clear_memory


@register_dtype("int_sym_dq")
Expand Down Expand Up @@ -789,7 +789,7 @@ def quant_tensor_gguf_sym_dq(
Quantized and de-quantized tensor, scale, zero-point
"""

from auto_round.export.export_to_gguf.config import K_SCALE_SIZE, QK_K
from auto_round.export.export_to_gguf.config import QK_K

if bits not in [3, 6]:
raise KeyError(f"bits={bits} is not supported by gguf_int_sym_dq, please check.")
Expand Down
1 change: 1 addition & 0 deletions auto_round/devices/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from auto_round.devices.device_manager_haha import device_manager
Loading