Entra / Microsoft 365 · SharePoint & OneDrive
Export Word document to PDF
Cleans DOCX fonts, uploads to OneDrive via Graph, exports through Word Online, and produces a print-optimized PDF.
Connect & set up
Run these once per session. All scopes are read-only unless the script makes changes.
Connect-MgGraph -Scopes "Files.ReadWrite", "Sites.ReadWrite.All" -NoWelcome
Run it
The main script. Copy it, or download the .ps1 and run it from your console.
Write-Host "Starting KDP document pipeline..." -ForegroundColor Cyan$CleanDocx = Join-Path $OutputFolder "Cleaned.docx"$CloudPdf = Join-Path $OutputFolder "CloudExport.pdf"$PrintPdf = Join-Path $OutputFolder "KDP_PrintReady.pdf"# -----------------------------# 1. CLEAN FONTS IN DOCX# -----------------------------Write-Host "Cleaning fonts inside DOCX (removing Lucida*)" -ForegroundColor Green$temp = Join-Path $env:TEMP ("DOCX_" + (New-Guid))New-Item -ItemType Directory -Path $temp | Out-Null# Unzip DOCXExpand-Archive -Path $DocxPath -DestinationPath $temp -Force# Target XML files$xmlTargets = @(Join-Path $temp "word\styles.xml",Join-Path $temp "word\document.xml",Join-Path $temp "word\fontTable.xml")foreach ($file in $xmlTargets) {if (Test-Path $file) {(Get-Content $file) |ForEach-Object { $_ -replace "Lucida[^<""]*", "Consolas" } |Set-Content $file -Encoding UTF8}}# Rebuild cleaned DOCXif (Test-Path $CleanDocx) { Remove-Item $CleanDocx -Force }Compress-Archive -Path (Join-Path $temp "*") -DestinationPath $CleanDocx -Force# CleanupRemove-Item $temp -Recurse -ForceWrite-Host "DOCX cleaned and rebuilt → $CleanDocx" -ForegroundColor Green# -----------------------------# 2. EXPORT DOCX → PDF USING WORD ONLINE (Graph)# -----------------------------Write-Host "Connecting to Microsoft Graph..." -ForegroundColor CyanConnect-MgGraph -Scopes "Files.ReadWrite", "Sites.ReadWrite.All" -NoWelcome# Upload file to OneDriveWrite-Host "Uploading DOCX to OneDrive..." -ForegroundColor Green$drive = Get-MgUserDrive -UserId (Get-MgContext).Account$driveId = $drive.Id$filename = Split-Path $CleanDocx -Leaf$cloudItem = New-MgDriveItemContentUploadSession `-DriveId $driveId `-DriveItemId "root:/$($OneDrivePath)/$filename:" `-AdditionalProperties @{}# Upload in one shotInvoke-RestMethod -Uri $cloudItem.UploadUrl -Method PUT -InFile $CleanDocx -ContentType "application/vnd.openxmlformats-officedocument.wordprocessingml.document"Write-Host "Exporting to PDF via Word Online..." -ForegroundColor Green$encodedPath = [System.Web.HttpUtility]::UrlEncode("$OneDrivePath/$filename")$pdfBytes = Invoke-MgGraphRequest `-Uri "https://graph.microsoft.com/v1.0/me/drive/root:/$encodedPath:/content?format=pdf" `-Method GET `-OutputType Stream[IO.File]::WriteAllBytes($CloudPdf, $pdfBytes)Write-Host "PDF export complete → $CloudPdf" -ForegroundColor Green# -----------------------------# 3. OPTIMIZE PDF FOR PRINT# -----------------------------Write-Host "Optimizing PDF for print (removing hyperlinks & annotations)..." -ForegroundColor CyanAdd-Type -AssemblyName System.IO.Compression.FileSystemtry {$reader = New-Object iTextSharp.text.pdf.PdfReader($CloudPdf)$n = $reader.NumberOfPages$out = New-Object iTextSharp.text.pdf.PdfStamper($reader, [System.IO.File]::Create($PrintPdf))for ($i=1; $i -le $n; $i++) {$page = $reader.GetPageN($i)$page.Remove(PdfName.ANNOTS)}$out.Close()$reader.Close()}catch {Write-Host "iTextSharp optimisation failed — using fallback scrubber." -ForegroundColor Yellow# Fallback method using PyPDF2-like cleaning via .NET$pdf = [System.IO.File]::ReadAllText($CloudPdf)$pdf = $pdf -replace "/Annots\s*\[.*?\]", ""[System.IO.File]::WriteAllText($PrintPdf, $pdf)}Write-Host "Print‑optimized PDF created → $PrintPdf" -ForegroundColor GreenWrite-Host "`nPipeline complete. KDP-ready PDF is available." -ForegroundColor Cyan
Attribution
Author
Office365itpros