|
32 | 32 | import uuid |
33 | 33 | from typing import Callable, Generator, Optional, Union, Tuple, List, Sequence, Mapping, Any, \ |
34 | 34 | Iterator, Iterable, KeysView, ItemsView, ValuesView, Dict, overload |
35 | | -from dataclasses import dataclass |
| 35 | +from dataclasses import dataclass, field |
36 | 36 | from enum import IntFlag |
37 | 37 |
|
38 | 38 | import collections |
@@ -526,6 +526,88 @@ def view(self) -> 'BinaryView': |
526 | 526 | return self._view |
527 | 527 |
|
528 | 528 |
|
| 529 | +@dataclass |
| 530 | +class StringDetectionParameters: |
| 531 | + """Parameters controlling raw string detection, as used by the core strings analysis.""" |
| 532 | + min_string_length: int = 4 |
| 533 | + utf8_enabled: bool = True |
| 534 | + utf16_enabled: bool = True |
| 535 | + utf32_enabled: bool = True |
| 536 | + unicode_block_names: List[str] = field(default_factory=list) |
| 537 | + |
| 538 | + @classmethod |
| 539 | + def from_settings( |
| 540 | + cls, settings_obj: Optional['settings.Settings'] = None, view: Optional['BinaryView'] = None |
| 541 | + ) -> 'StringDetectionParameters': |
| 542 | + """ |
| 543 | + ``from_settings`` builds parameters from the standard string-analysis settings: |
| 544 | + ``analysis.limits.minStringLength`` and ``analysis.unicode.{blocks,utf8,utf16,utf32}``. |
| 545 | + """ |
| 546 | + if settings_obj is None: |
| 547 | + settings_obj = settings.Settings() |
| 548 | + return cls( |
| 549 | + min_string_length=settings_obj.get_integer("analysis.limits.minStringLength", view), |
| 550 | + utf8_enabled=settings_obj.get_bool("analysis.unicode.utf8", view), |
| 551 | + utf16_enabled=settings_obj.get_bool("analysis.unicode.utf16", view), |
| 552 | + utf32_enabled=settings_obj.get_bool("analysis.unicode.utf32", view), |
| 553 | + unicode_block_names=settings_obj.get_string_list("analysis.unicode.blocks", view) |
| 554 | + ) |
| 555 | + |
| 556 | + |
| 557 | +@dataclass(frozen=True) |
| 558 | +class DetectedString: |
| 559 | + """A string detected by :py:func:`detect_strings_in_block`. ``start`` is relative to the |
| 560 | + ``base_address`` passed to the detector, and ``length`` is in bytes.""" |
| 561 | + type: StringType |
| 562 | + start: int |
| 563 | + length: int |
| 564 | + |
| 565 | + |
| 566 | +def detect_strings_in_block( |
| 567 | + data: bytes, base_address: int = 0, parameters: Optional[StringDetectionParameters] = None |
| 568 | +) -> List[DetectedString]: |
| 569 | + """ |
| 570 | + ``detect_strings_in_block`` detects strings in a raw data buffer using the same detection logic |
| 571 | + as the core strings analysis. Unlike :py:meth:`BinaryView.get_strings`, the data does not need to |
| 572 | + be part of a BinaryView. |
| 573 | +
|
| 574 | + :param data: Buffer to scan |
| 575 | + :param base_address: Address reported for offset 0 of ``data`` |
| 576 | + :param parameters: Detection parameters; defaults to the current global settings |
| 577 | + :return: The strings detected |
| 578 | + """ |
| 579 | + if parameters is None: |
| 580 | + parameters = StringDetectionParameters.from_settings() |
| 581 | + |
| 582 | + params = core.BNStringDetectionParameters() |
| 583 | + params.minStringLength = parameters.min_string_length |
| 584 | + params.utf8Enabled = parameters.utf8_enabled |
| 585 | + params.utf16Enabled = parameters.utf16_enabled |
| 586 | + params.utf32Enabled = parameters.utf32_enabled |
| 587 | + block_names = (ctypes.c_char_p * len(parameters.unicode_block_names))() |
| 588 | + for i, name in enumerate(parameters.unicode_block_names): |
| 589 | + block_names[i] = core.cstr(name) |
| 590 | + params.unicodeBlockNames = ctypes.cast(block_names, ctypes.POINTER(ctypes.c_char_p)) |
| 591 | + params.unicodeBlockNameCount = len(parameters.unicode_block_names) |
| 592 | + |
| 593 | + detector = core.BNCreateStringDetector(params) |
| 594 | + assert detector is not None, "core.BNCreateStringDetector returned None" |
| 595 | + result = [] |
| 596 | + try: |
| 597 | + buf = (ctypes.c_ubyte * len(data)).from_buffer_copy(data) |
| 598 | + count = ctypes.c_ulonglong() |
| 599 | + strings = core.BNStringDetectorDetectStrings(detector, buf, len(data), len(data), base_address, None, count) |
| 600 | + assert strings is not None, "core.BNStringDetectorDetectStrings returned None" |
| 601 | + try: |
| 602 | + for i in range(count.value): |
| 603 | + result.append(DetectedString(StringType(strings[i].type), strings[i].start, strings[i].length)) |
| 604 | + finally: |
| 605 | + core.BNFreeStringReferenceList(strings) |
| 606 | + finally: |
| 607 | + core.BNFreeStringDetector(detector) |
| 608 | + return result |
| 609 | + |
| 610 | + |
529 | 611 | class StringRef: |
530 | 612 | """Deduplicated reference to a string owned by the Binary Ninja core. Use `str` or `bytes` to convert |
531 | 613 | this to a standard Python string or sequence of bytes.""" |
|
0 commit comments