Automatic Tile Splitting

Report · Aug 28, 2023

Hello friends, I have nearly 300 pages of png files. How do I automatically sort them into boxes? The location of the texts in each file changes.

I want sample file

Report · Aug 28, 2023

Please explain what you actually mean and provide a couple of sample files (as is and the corresponding intended result).

Why are you working with png anyway?

Editing text-heavy files should ideally be done with vector-files that contain text-data.

Report · Aug 28, 2023

I want to extract the texts from the file in boxes. Because I know how to use photoshop.

I have the files

I want it this way

I want it to be automatic

Report · Aug 28, 2023

Again: Please provide a couple of sample files (as is and the corresponding intended result).

• Not just one original image (if »Ekran Resmi 2023-08-28 17-1.55.46.png« even is the original image and not a screenshot) and a bunch of miscelaneous (paragraphs 2 and 4 seem to have no connection to the »Ekran Resmi 2023-08-28 17-1.55.46.png«).

• Do you only want to extract the paragraph number 12 from »Ekran Resmi 2023-08-28 17-1.55.46.png«?

Do you have access to the vector/type data even if you prefer the pixel data?

Report · Aug 28, 2023

I want to shred main file like this

files (more professionally of course)

Report · Aug 28, 2023

On the two files you posted, this script works perfectly. How it will work with others - I do not know ¯\_(ツ)_/¯

var apl = new AM('application'),
    doc = new AM('document'),
    lr = new AM('layer'),
    tiles = [];
const THRESHOLD_LEVEL = 80,
    MINIMUM_RADIUS = 30,
    BLUR_DISTANCE = 200;
try {
    if (apl.getProperty('numberOfDocuments')) {
        activeDocument.suspendHistory('Tile Splitting', 'function () {}');
        activeDocument.suspendHistory('Trim', 'trim()');
        var docRes = doc.getProperty('resolution'),
            docW = doc.getProperty('width') * docRes / 72,
            docH = doc.getProperty('height') * docRes / 72;
        activeDocument.suspendHistory('Save strip', 'getPixelStrip()');
        doc.stepBack();
        if (tiles.length) {
            var title = doc.getProperty('title').replace(/\.[0-9a-z]+$/i, '') + '-',
                pth = doc.getProperty('fileReference').parent;
            for (var i = 0; i < tiles.length; i++) {
                doc.duplicate(i + 1);
                doc.makeSelection(tiles[i][0], 0, tiles[i][1], docW);
                doc.crop();
                lr.copyToLayer();
                doc.levels([128, 220]);
                doc.selectionFromChannel('RGB');
                doc.inverseSelection();
                var bounds = doc.getProperty('selection').value;
                lr.deleteLayer();
                doc.makeSelection(bounds.getDouble(stringIDToTypeID('top')) - 2, 0, bounds.getDouble(stringIDToTypeID('bottom')) + 2, docW);
                doc.crop();
                doc.saveToPNG(title + (i + 1), pth);
                doc.close('no');
            }
        }
        doc.stepBack();
    }
} catch (e) { alert('A lot of things can go wrong in this script. :(\n\n' + e) }
function trim() {
    lr.copyToLayer();
    lr.invert();
    lr.threshold(THRESHOLD_LEVEL);
    lr.trim('topLeftPixelColor', 1, 1, 1, 1);
    lr.deleteLayer();
}
function getPixelStrip() {
    lr.copyToLayer();
    lr.filterMinimum(MINIMUM_RADIUS, 'squareness');
    lr.motionBlur(0, BLUR_DISTANCE)
    lr.levels([195, 220])
    doc.makeSelection(0, 0, docH, 1);
    doc.flatten();
    doc.crop();
    doc.convertToGrayscale();
    var f = new File(Folder.temp + '/colors.raw');
    doc.saveToRAW(f)
    tiles = findTiles(f);
}
function findTiles(f) {
    var content = '';
    if (f.exists) {
        f.open('r');
        f.encoding = "BINARY";
        content = f.read();
        f.close();
        f.remove();
        var colors = function (s) {
            var m = 0, c = [];
            for (var i = 0; i < s.length; i++) {
                var k = s.charCodeAt(i); m += k; c.push(k)
            };
            return c
        }(content);
        var tiles = [],
            cur = 0;
        do {
            var tile = [];
            for (cur; cur < colors.length; cur++) {
                if (cur == colors.length - 1) {
                    c = 0
                }
                if (colors[cur] < 16) {
                    if (!tile.length || cur == colors.length - 1) {
                        tile.push(cur)
                        if (cur == colors.length - 1) tiles.push(tile);
                    } else continue;
                }
                if (colors[cur] > 128) {
                    if (tile.length == 1) {
                        tile.push(cur - 1);
                        tiles.push(tile);
                        break;
                    } else continue;
                }
            }
            if (cur == colors.length) break;
        } while (true);
        return tiles;
    }
}
function AM(target) {
    var s2t = stringIDToTypeID,
        t2s = typeIDToStringID,
        c2t = charIDToTypeID;
    target = target ? s2t(target) : null;
    this.getProperty = function (property, id, idxMode) {
        property = s2t(property);
        (r = new ActionReference()).putProperty(s2t('property'), property);
        id != undefined ? (idxMode ? r.putIndex(target, id) : r.putIdentifier(target, id)) :
            r.putEnumerated(target, s2t('ordinal'), s2t('targetEnum'));
        return getDescValue(executeActionGet(r), property)
    }
    this.hasProperty = function (property, id, idxMode) {
        property = s2t(property);
        (r = new ActionReference()).putProperty(s2t('property'), property);
        id ? (idxMode ? r.putIndex(target, id) : r.putIdentifier(target, id))
            : r.putEnumerated(target, s2t('ordinal'), s2t('targetEnum'));
        return executeActionGet(r).hasKey(property)
    }
    this.copyToLayer = function () {
        executeAction(s2t("copyToLayer"), undefined, DialogModes.NO);
    }
    this.invert = function () {
        executeAction(s2t("invert"), new ActionDescriptor(), DialogModes.NO);
    }
    this.threshold = function (level) {
        (d = new ActionDescriptor()).putInteger(s2t("level"), level);
        executeAction(s2t("thresholdClassEvent"), d, DialogModes.NO);
    }
    this.trim = function (mode, top, bottom, left, right) {
        (d = new ActionDescriptor()).putEnumerated(s2t("trimBasedOn"), s2t("trimBasedOn"), s2t(mode));
        d.putBoolean(s2t("top"), top);
        d.putBoolean(s2t("bottom"), bottom);
        d.putBoolean(s2t("left"), left);
        d.putBoolean(s2t("right"), right);
        executeAction(s2t("trim"), d, DialogModes.NO);
    }
    this.filterMinimum = function (radius, mode) {
        (d = new ActionDescriptor()).putUnitDouble(c2t("Rds "), s2t("pixelsUnit"), radius);
        d.putEnumerated(s2t("preserveShape"), s2t("preserveShape"), s2t(mode));
        executeAction(s2t("minimum"), d, DialogModes.NO);
    }
    this.motionBlur = function (angle, distance) {
        (d = new ActionDescriptor()).putInteger(s2t("angle"), angle);
        d.putUnitDouble(s2t("distance"), s2t("pixelsUnit"), distance);
        executeAction(s2t("motionBlur"), d, DialogModes.NO);
    }
    this.levels = function (levels) {
        (d = new ActionDescriptor()).putEnumerated(s2t("presetKind"), s2t("presetKindType"), s2t("presetKindCustom"));
        (r = new ActionReference()).putEnumerated(s2t("channel"), s2t("channel"), s2t("composite"));
        (d1 = new ActionDescriptor()).putReference(s2t("channel"), r);
        var l1 = new ActionList();
        for (var i = 0; i < levels.length; i++)  l1.putInteger(levels[i]);
        d1.putList(s2t("input"), l1);
        (l = new ActionList()).putObject(s2t("levelsAdjustment"), d1);
        d.putList(s2t("adjustment"), l);
        executeAction(s2t("levels"), d, DialogModes.NO);
    }
    this.convertToGrayscale = function () {
        (d = new ActionDescriptor()).putClass(s2t("to"), s2t("grayscaleMode"));
        executeAction(s2t("convertMode"), d, DialogModes.NO);
    }
    this.flatten = function () {
        executeAction(s2t("flattenImage"), new ActionDescriptor(), DialogModes.NO);
    }
    this.stepBack = function () {
        (r = new ActionReference()).putProperty(c2t("HstS"), s2t("currentHistoryState"));
        (d = new ActionDescriptor()).putReference(s2t("target"), r);
        executeAction(s2t("delete"), d, DialogModes.NO);
    }
    this.makeSelection = function (top, left, bottom, right) {
        (r = new ActionReference()).putProperty(s2t("channel"), s2t("selection"));
        (d = new ActionDescriptor()).putReference(s2t("null"), r);
        (d1 = new ActionDescriptor()).putUnitDouble(s2t("top"), s2t("pixelsUnit"), top);
        d1.putUnitDouble(s2t("left"), s2t("pixelsUnit"), left);
        d1.putUnitDouble(s2t("bottom"), s2t("pixelsUnit"), bottom);
        d1.putUnitDouble(s2t("right"), s2t("pixelsUnit"), right);
        d.putObject(s2t("to"), s2t("rectangle"), d1);
        executeAction(s2t("set"), d, DialogModes.NO);
    }
    this.crop = function () {
        (d = new ActionDescriptor()).putBoolean(s2t("delete"), true);
        executeAction(s2t("crop"), d, DialogModes.NO);
    }
    this.saveToRAW = function (f) {
        (d = new ActionDescriptor()).putBoolean(s2t('copy'), true);
        (d1 = new ActionDescriptor()).putObject(s2t("as"), s2t("rawFormat"), d);
        d1.putPath(s2t("in"), f);
        executeAction(s2t("save"), d1, DialogModes.NO);
    }
    this.saveToPNG = function (title, pth) {
        (d = new ActionDescriptor()).putObject(s2t("as"), s2t("PNGFormat"), new ActionDescriptor());
        d.putPath(s2t("in"), new File(pth + '/' + title + '.png'));
        d.putBoolean(s2t("copy"), true);
        executeAction(s2t("save"), d, DialogModes.NO);
    }
    this.duplicate = function (title) {
        (r = new ActionReference()).putEnumerated(target, s2t("ordinal"), s2t("targetEnum"));
        (d = new ActionDescriptor()).putReference(s2t("null"), r);
        d.putString(s2t("name"), title);
        executeAction(s2t("duplicate"), d, DialogModes.NO);
    }
    this.selectionFromChannel = function (channel) {
        (r = new ActionReference()).putProperty(s2t("channel"), s2t("selection"));
        (d = new ActionDescriptor()).putReference(s2t("null"), r);
        (r1 = new ActionReference()).putEnumerated(s2t("channel"), s2t("channel"), s2t(channel));
        d.putReference(s2t("to"), r1);
        executeAction(s2t("set"), d, DialogModes.NO);
    }
    this.inverseSelection = function () {
        executeAction(s2t("inverse"), undefined, DialogModes.NO);
    }
    this.close = function (yesNo) {
        (d = new ActionDescriptor()).putEnumerated(s2t("saving"), s2t("yesNo"), s2t(yesNo));
        executeAction(s2t("close"), d, DialogModes.NO);
    }
    this.deleteLayer = function () {
        (r = new ActionReference()).putEnumerated(target, s2t('ordinal'), s2t('targetEnum'));
        (d = new ActionDescriptor()).putReference(s2t("null"), r);
        executeAction(s2t("delete"), d, DialogModes.NO);
    }
    function getDescValue(d, p) {
        switch (d.getType(p)) {
            case DescValueType.OBJECTTYPE: return { type: t2s(d.getObjectType(p)), value: d.getObjectValue(p) };
            case DescValueType.LISTTYPE: return d.getList(p);
            case DescValueType.REFERENCETYPE: return d.getReference(p);
            case DescValueType.BOOLEANTYPE: return d.getBoolean(p);
            case DescValueType.STRINGTYPE: return d.getString(p);
            case DescValueType.INTEGERTYPE: return d.getInteger(p);
            case DescValueType.LARGEINTEGERTYPE: return d.getLargeInteger(p);
            case DescValueType.DOUBLETYPE: return d.getDouble(p);
            case DescValueType.ALIASTYPE: return d.getPath(p);
            case DescValueType.CLASSTYPE: return d.getClass(p);
            case DescValueType.UNITDOUBLE: return (d.getUnitDoubleValue(p));
            case DescValueType.ENUMERATEDTYPE: return { type: t2s(d.getEnumerationType(p)), value: t2s(d.getEnumerationValue(p)) };
            default: break;
        };
    }
}

Report · Aug 28, 2023

@erol5ECF, please reply in the thread, not in private messages. It doesn't matter to you, but a public dialogue can help other users.

In the topic, you were asked several times to provide the original image with which you will work. Like @c.pfaffenbichler, i waited several hours for clarifications from you, but they did not follow, so I decided that all the remaining images look the same as your screenshots.

If the script does not work on other images, then most likely its parameters need to be adjusted. Initially, I used the following values, which you can see in the code:

MINIMUM_RADIUS = 30
BLUR_DISTANCE = 200;

The script initially trims the black edges. If your original image does not contain them, then remove or comment out the line:

lr.trim('topLeftPixelColor', 1, 1, 1, 1);

(it is better not to delete the parent function, since it affects the number of history rollbacks performed by the script)

Then I execute the command filters -> other -> minimum. My goal is to get evenly black blocks of text. In your example, a radius of 30 was enough for me, but perhaps a larger value should be set. Try to execute the minimum function on other images and find the optimal constant value.

Next, I do motion blur. The purpose of this action is to get black continuous stripes from the left side of the image that separate each block of text. The right side of the image does not interest me at the moment. I used a radius of 200, but you can also try changing this setting.

After applying levels with a black point of 195 and a white point of 220, I get this image:

lr.levels([195, 220])

From it, I only need the left column 1 pixel wide and image height (I expanded it for an example)

Further, the script simply looks for the boundaries of the black blocks and splits the original image based on them. If you see that a column of pixels contains artifacts or gaps, you can move the selection position closer to the center of the image, as long as it is 1 pixel wide

doc.makeSelection(0, 0, docH, 1); //(top, left, bottom, right)

I think you have received enough information to independently correct the script on the original images.

Report · Aug 29, 2023

Sorry, this is my first location, it took me a while to figure out how it works. 🙂 I'll try and post back here

Report · Aug 29, 2023

The process works in the sample file I uploaded, but I did not upload the original file because I am a beginner.
I get output like this,

I tried the settings but could not reach the result.

Files I Will Process

It works great in the picture I added here, but since I'm a beginner, I had to upload my real file, I took the screenshot and uploaded it 😞 so it didn't work in my file.

Report · Aug 29, 2023

Post the files with questions 6 and 7. I see 1-4 and 16-20.

If you change the script parameters to

MINIMUM_RADIUS = 60,
BLUR_DISTANCE = 500;

then they are processed quite correctly (not counting the footer, but as I understand it, it is not needed)

Unfortunately, Photoshop does not allow scripts to access the content of images. I have to use indirect methods to segment the image, keeping the balance between script execution time and the result obtained. It takes some patience to find the optimal parameters, or a fundamentally different approach. Perhaps someone else will come up with a better solution.

Report · Aug 29, 2023

You are a true hero, I am so grateful.

I'm asking for be sure
MINIMUM_RADIUS = x,
BLUR_DISTANCE = x;
I can customize it just by playing with these settings, right?

Report · Aug 29, 2023

Yes, sure. I intentionally made them into constants to make it easier to edit. Above, I explained what they do and what result you should strive for.

Report · Aug 29, 2023

thank you so much you saved me from a big burden

Report · Aug 29, 2023

And please remember to mark the Correct Answer/s as such.

Report · Sep 02, 2023

It saddens me so much to be a burden to you again, but the app has been updated. The code no longer works. I reverted to old versions but it still didn't work (including apple silicon beta versions). Can you help me?

Report · Sep 02, 2023

The code is written to work with RGB images. After the update, your PDF import options have lost and you open them as CMYK. This can be fixed in the script code, but the easiest way is to find the color mode settings when importing PDF and change CMYK to RGB.

Report · Sep 02, 2023

Thanks a lot

Automatic Tile Splitting

1 Correct answer

Explore related tutorials & articles