Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions spikes/uipath/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
node_modules/
.env
.venv/
uipath.log
*.pdf
467 changes: 467 additions & 0 deletions spikes/uipath/README.md

Large diffs are not rendered by default.

47 changes: 25 additions & 22 deletions spikes/uipath/extractDoc.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
import axios from "axios";
import {
duPost,
UIPATH_BASE_URL,
getExtractorGuid,
getProjectId,
} from "./uipathClient.js";

export async function extractDoc(token, docId) {
const extract = await axios.post(
`https://govcloud.uipath.us:443/${process.env.EXTRACTOR_GUID}/du_/api/framework/projects/${process.env.ZERO_PROJECT_ID}/extractors/generative_extractor/extraction/start?api-version=1.0`,
{
documentId: docId,
pageRange: null,
prompts: [
{
id: "State",
question: "What state is this 1115 waver for?",
fieldType: "Text",
multiValued: false,
},
],
configuration: null,
},
{
headers: {
Authorization: `Bearer ${token}`,
},
}
);
const extractorGuid = getExtractorGuid(); // NOTE: Zoe might make her own. So we may need to query here to get the right GUID
const projectId = getProjectId();
const url = `${UIPATH_BASE_URL}:443/${extractorGuid}/du_/api/framework/projects/${projectId}/extractors/generative_extractor/extraction/start`;

const extract = await duPost(url, token, {
documentId: docId,
pageRange: null,
prompts: activeQuestionBlobs,
configuration: null,
});
return extract.data.resultUrl;
}

const activeQuestionBlobs = [
{
id: "State",
question: "What state is this 1115 waver for?",
fieldType: "Text",
multiValued: false,
},
];
67 changes: 54 additions & 13 deletions spikes/uipath/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,71 @@ import { getToken } from "./getToken.js";
import { uploadDocument } from "./uploadDocument.js";
import { extractDoc } from "./extractDoc.js";
import { fetchExtractionResult } from "./fetchExtractResult.js";
import { createLogFile, log } from "./logFile.js";
import { getProjectId, getExtractorGuid } from "./uipathClient.js";

dotenv.config();

function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}

const token = await getToken();
console.log("Got the auth token.");
// Positional args: [0]=node, [1]=index.js, [2]=input file, [3]=log file
const inputFile = process.argv[2] || "ak-behavioral-health-demo-pa.pdf";
const logPath = createLogFile(process.argv[3] || process.env.LOG_FILE || "uipath.log");

const docId = await uploadDocument(token, "ak-behavioral-health-demo-pa.pdf");
console.log(`Document ID: ${docId}`);
if (! inputFile) {
log("Usage: node index.js <input-file> [log-file]", logPath);
throw new Error("Input file is required");
process.exit(1);
}

try {
// Validate required env early to avoid undefined IDs in URLs
getProjectId();
getExtractorGuid();

const resultUrl = await extractDoc(token, docId);
console.log(`Result URL: ${resultUrl}`);
const token = await getToken();
if (!token) {
throw new Error("No auth token received.");
}
log("Got the auth token.", logPath);

while (true) {
await sleep(1 * 1000);
let status = await fetchExtractionResult(token, resultUrl);
log(`Using input file: ${inputFile}`, logPath);

const docId = await uploadDocument(token, inputFile);
if (!docId || typeof docId !== "string") {
log(`Upload returned unexpected response: ${util.inspect(docId)}`, logPath);
throw new Error("Upload failed or returned invalid document ID.");
}
log(`Document ID: ${docId}`, logPath);

if (status.status === "Succeeded") {
console.log(util.inspect(status, false, null, true /* enable colors */));
break;
const resultUrl = await extractDoc(token, docId);
if (!resultUrl || typeof resultUrl !== "string") {
log(`Extraction start returned unexpected response: ${util.inspect(resultUrl)}`, logPath);
throw new Error("Extraction start failed or returned invalid result URL.");
}
log(`Result URL: ${resultUrl}`, logPath);

console.log(status);
let delayMs = 5_000; // start at 5s to reduce polling cost
const maxDelayMs = 30_000; // cap backoff at 30s
Comment on lines +53 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think UIPath charges for the fetch of the status, so this might not be saving any money. It will also increase the average polling overhead from 500ms to between 2.5 seconds to 15 seconds.


while (true) {
await sleep(delayMs);
let status = await fetchExtractionResult(token, resultUrl);

if (status.status === "Succeeded") {
log(util.inspect(status, false, null, true /* enable colors */), logPath);
break;
}

log(status, logPath);
delayMs = Math.min(delayMs * 2, maxDelayMs); // exponential backoff to limit requests/credits
}
} catch (error) {
log(`Fatal error: ${error.message}`, logPath);
if (error.response?.data) {
log(`Response data: ${JSON.stringify(error.response.data)}`, logPath);
}
throw error;
}
30 changes: 30 additions & 0 deletions spikes/uipath/logFile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import fs from "fs";
import path from "path";

const DEFAULT_LOG_PATH = "uipath.log";

function ensureParentDir(logPath) {
const dir = path.dirname(logPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
}

export function createLogFile(logPath = DEFAULT_LOG_PATH, { overwrite = false } = {}) {
ensureParentDir(logPath);
if (overwrite || !fs.existsSync(logPath)) {
fs.writeFileSync(logPath, "");
}
return logPath;
}

export function appendToLog(message, logPath = DEFAULT_LOG_PATH) {
ensureParentDir(logPath);
const output = typeof message === "string" ? message : JSON.stringify(message);
fs.appendFileSync(logPath, output + "\n");
}

export function log(message, logPath = DEFAULT_LOG_PATH) {
console.log(message);
appendToLog(message, logPath);
}
1 change: 0 additions & 1 deletion spikes/uipath/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions spikes/uipath/uipathClient.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import axios from "axios";

export const UIPATH_BASE_URL = "https://govcloud.uipath.us";
export const UIPATH_TENANT = "globalalliant/Dev";
export const UIPATH_API_VERSION = "1.0";

export function getProjectId() {
const projectId = process.env.ZERO_PROJECT_ID;
if (!projectId) {
throw new Error("Missing ZERO_PROJECT_ID in environment.");
}
return projectId;
}

// Basically the model it's using, may want different models for differnt files.
// May need to request to checj the differeent extractors.
export function getExtractorGuid() {
const extractorGuid = process.env.EXTRACTOR_GUID;
if (!extractorGuid) {
throw new Error("Missing EXTRACTOR_GUID in environment.");
}
return extractorGuid;
}

// duPost == Document Understanding POST
export function duPost(url, token, data, options = {}) {
const { params = {}, headers = {}, ...rest } = options;

return axios.post(url, data, {
headers: {
Authorization: `Bearer ${token}`,
...headers,
},
params: {
"api-version": UIPATH_API_VERSION,
...params,
},
...rest,
});
}
43 changes: 23 additions & 20 deletions spikes/uipath/uploadDocument.js
Original file line number Diff line number Diff line change
@@ -1,33 +1,36 @@
import axios from "axios";
import fs from "fs";
import FormData from "form-data";
import { log } from "./logFile.js";
import { duPost, UIPATH_BASE_URL, UIPATH_TENANT, getProjectId } from "./uipathClient.js";

export async function uploadDocument(token, fileName) {
const projectId = getProjectId();
const url = `${UIPATH_BASE_URL}/${UIPATH_TENANT}/du_/api/framework/projects/${projectId}/digitization/start`;

export async function uploadDocument(token,fileName) {
const formData = new FormData();
formData.append(
"file",
fs.createReadStream(fileName),
fileName
);

const doc = await axios.post(
`https://govcloud.uipath.us/globalalliant/Dev/du_/api/framework/projects/${process.env.ZERO_PROJECT_ID}/digitization/start?api-version=1.0`,
formData,
{
formData.append("file", fs.createReadStream(fileName), fileName);

try {
const doc = await duPost(url, token, formData, {
headers: {
"Content-Type": "multipart/form-data", // Important for file uploads
// form-data requires its own headers so axios can set boundaries
...formData.getHeaders(),
"x-uipath-page-range": "All",
Authorization: `Bearer ${token}`,
},
onUploadProgress: (progressEvent) => {
// Optional: Track upload progress
const percentCompleted = Math.round(
(progressEvent.loaded * 100) / progressEvent.total
);
console.log(`Upload progress: ${percentCompleted}%`);
const percentCompleted = Math.round((progressEvent.loaded * 100) / progressEvent.total);
log(`Upload progress: ${percentCompleted}%`);
},
}
);
});

return doc.data.documentId;
return doc?.data?.documentId || doc?.data;
} catch (error) {
log(`Error uploading document: ${error.message}`);
if (error.response?.data) {
log(`Upload error response: ${JSON.stringify(error.response.data)}`);
}
throw error;
}
}